def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodIndex._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ 'to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name' ] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = [ 'components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil' ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range('20130101', periods=5), name='xxx'), Series(date_range('20130101', periods=5, freq='s'), name='xxx'), Series(date_range('20130101 00:00:00', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == 'US/Eastern' freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq='infer').freq # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') exp_values = (DatetimeIndex( s.values).tz_localize('UTC').tz_convert('US/Eastern')) expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) # round s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.round('D') expected = Series(pd.to_datetime( ['2012-01-02', '2012-01-02', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # round with tz result = ( s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D')) exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01']).tz_localize('US/Eastern') expected = Series(exp_values, name='xxx') tm.assert_series_equal(result, expected) # floor s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.floor('D') expected = Series(pd.to_datetime( ['2012-01-01', '2012-01-01', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # ceil s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.ceil('D') expected = Series(pd.to_datetime( ['2012-01-02', '2012-01-02', '2012-01-02']), name='xxx') tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == 'CET' freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq='infer').freq # timedelta index cases = [ Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_td: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, pd.Series) assert result.dtype == 'float64' freq_result = s.dt.freq assert freq_result == TimedeltaIndex(s.values, freq='infer').freq # both index = date_range('20130101', periods=3, freq='D') s = Series(date_range('20140204', periods=3, freq='s'), index=index, name='xxx') exp = Series(np.array([2014, 2014, 2014], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name='xxx') tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range('20130101', periods=5, freq='D'), name='xxx') ] for s in cases: for prop in ok_for_period: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series( period_range('20130101', periods=5, freq='D', name='xxx').asobject) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'), name='xxx') s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T', tz='UTC').tz_convert('America/Chicago') expected = Series(exp_values, name='xxx') tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') with tm.assert_raises_regex(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context('chained_assignment', 'raise'): def f(): s.dt.hour[0] = 5 pytest.raises(com.SettingWithCopyError, f)
def test_value_counts_datetime64(self, index_or_series): klass = index_or_series # GH 3002, datetime64[ns] # don't test names though txt = "\n".join([ "xxyyzz20100101PIE", "xxyyzz20100101GUM", "xxyyzz20100101EGG", "xxyyww20090101EGG", "foofoo20080909PIE", "foofoo20080909GUM", ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df["dt"].copy()) s.name = None idx = pd.to_datetime([ "2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00" ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat( [ "2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00" ], dtype="datetime64[ns]", ) if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) assert s.nunique() == 3 # with NaT s = df["dt"].copy() s = klass(list(s.values) + [pd.NaT]) result = s.value_counts() assert result.index.dtype == "datetime64[ns]" tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() assert unique.dtype == "datetime64[ns]" # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) assert s.nunique() == 3 assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name="dt") result = td.value_counts() expected_s = Series([6], index=[Timedelta("1day")], name="dt") tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(["1 days"], name="dt") if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_nat(self): self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT)
def test_maybe_cast_slice_duplicate_monotonic(self): # https://github.com/pandas-dev/pandas/issues/16515 idx = DatetimeIndex(["2017", "2017"]) result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc") expected = Timestamp("2017-01-01") assert result == expected
def test_subtraction_ops_with_tz(self): # check that dt/dti subtraction ops with tz are validated dti = date_range('20130101', periods=3) ts = Timestamp('20130101') dt = ts.to_pydatetime() dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') ts_tz = Timestamp('20130101').tz_localize('US/Eastern') ts_tz2 = Timestamp('20130101').tz_localize('CET') dt_tz = ts_tz.to_pydatetime() td = Timedelta('1 days') def _check(result, expected): assert result == expected assert isinstance(result, Timedelta) # scalars result = ts - ts expected = Timedelta('0 days') _check(result, expected) result = dt_tz - ts_tz expected = Timedelta('0 days') _check(result, expected) result = ts_tz - dt_tz expected = Timedelta('0 days') _check(result, expected) # tz mismatches pytest.raises(TypeError, lambda: dt_tz - ts) pytest.raises(TypeError, lambda: dt_tz - dt) pytest.raises(TypeError, lambda: dt_tz - ts_tz2) pytest.raises(TypeError, lambda: dt - dt_tz) pytest.raises(TypeError, lambda: ts - dt_tz) pytest.raises(TypeError, lambda: ts_tz2 - ts) pytest.raises(TypeError, lambda: ts_tz2 - dt) pytest.raises(TypeError, lambda: ts_tz - ts_tz2) # with dti pytest.raises(TypeError, lambda: dti - ts_tz) pytest.raises(TypeError, lambda: dti_tz - ts) pytest.raises(TypeError, lambda: dti_tz - ts_tz2) result = dti_tz - dt_tz expected = TimedeltaIndex(['0 days', '1 days', '2 days']) tm.assert_index_equal(result, expected) result = dt_tz - dti_tz expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) tm.assert_index_equal(result, expected) result = dti_tz - ts_tz expected = TimedeltaIndex(['0 days', '1 days', '2 days']) tm.assert_index_equal(result, expected) result = ts_tz - dti_tz expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) tm.assert_index_equal(result, expected) result = td - td expected = Timedelta('0 days') _check(result, expected) result = dti_tz - td expected = DatetimeIndex( ['20121231', '20130101', '20130102'], tz='US/Eastern') tm.assert_index_equal(result, expected)
sql1 = "select code from company" cur.execute(sql1) data1 = cur.fetchall() data2 = list(data1) print(data2) print(len(data2)) for a1 in data2: b=a1; print(b) query = "select Date,Open,High,Low,Last,Close,Total_Trade_Quantity,Turnover from stocks where Code = %(Code)s" cur.execute(query,{ 'Code' : b}) data = cur.fetchall() df = pd.DataFrame(list(data),columns=['Date','Open','High','Low','Last','Close','Total Trade Quantity','Turnover']) data3 = DatetimeIndex(df['Date']).day data3 = np.asarray(data3) dates = np.reshape(data3,(len(data3), 1)) prices = np.asarray(df['Close']) svr_poly = SVR(kernel= 'poly', C= 1e3, degree= 2) svr_poly.fit(dates, prices) length = len(prices) + 1 print(length) aaa = svr_poly.predict(length)[0] print("The next predicted price is:") print(aaa) query = "select Close from stocks where Code = %(Code)s" cur.execute(query, {'Code' : b}) data = cur.fetchall() df1 = pd.DataFrame(list(data),columns = ['Close']) df1 = df1.tail(1)
def test_dti_contains_with_duplicates(self): d = datetime(2011, 12, 5, 20, 30) ix = DatetimeIndex([d, d]) assert d in ix
def test_delete_slice(self): idx = date_range(start="2000-01-01", periods=10, freq="D", name="idx") # preserve freq expected_0_2 = date_range(start="2000-01-04", periods=7, freq="D", name="idx") expected_7_9 = date_range(start="2000-01-01", periods=7, freq="D", name="idx") # reset freq to None expected_3_5 = DatetimeIndex( [ "2000-01-01", "2000-01-02", "2000-01-03", "2000-01-07", "2000-01-08", "2000-01-09", "2000-01-10", ], freq=None, name="idx", ) cases = { (0, 1, 2): expected_0_2, (7, 8, 9): expected_7_9, (3, 4, 5): expected_3_5, } for n, expected in cases.items(): result = idx.delete(n) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq result = idx.delete(slice(n[0], n[-1] + 1)) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq for tz in [None, "Asia/Tokyo", "US/Pacific"]: ts = pd.Series( 1, index=pd.date_range("2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz), ) # preserve freq result = ts.drop(ts.index[:5]).index expected = pd.date_range("2000-01-01 14:00", periods=5, freq="H", name="idx", tz=tz) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq assert result.tz == expected.tz # reset freq to None result = ts.drop(ts.index[[1, 3, 5, 7, 9]]).index expected = DatetimeIndex( [ "2000-01-01 09:00", "2000-01-01 11:00", "2000-01-01 13:00", "2000-01-01 15:00", "2000-01-01 17:00", ], freq=None, name="idx", tz=tz, ) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq assert result.tz == expected.tz
def test_get_loc_nat(self): # GH#20464 index = DatetimeIndex(["1/3/2000", "NaT"]) assert index.get_loc(pd.NaT) == 1
def test_insert(self): idx = DatetimeIndex(["2000-01-04", "2000-01-01", "2000-01-02"], name="idx") result = idx.insert(2, datetime(2000, 1, 5)) exp = DatetimeIndex( ["2000-01-04", "2000-01-01", "2000-01-05", "2000-01-02"], name="idx") tm.assert_index_equal(result, exp) # insertion of non-datetime should coerce to object index result = idx.insert(1, "inserted") expected = Index( [ datetime(2000, 1, 4), "inserted", datetime(2000, 1, 1), datetime(2000, 1, 2), ], name="idx", ) assert not isinstance(result, DatetimeIndex) tm.assert_index_equal(result, expected) assert result.name == expected.name idx = date_range("1/1/2000", periods=3, freq="M", name="idx") # preserve freq expected_0 = DatetimeIndex( ["1999-12-31", "2000-01-31", "2000-02-29", "2000-03-31"], name="idx", freq="M", ) expected_3 = DatetimeIndex( ["2000-01-31", "2000-02-29", "2000-03-31", "2000-04-30"], name="idx", freq="M", ) # reset freq to None expected_1_nofreq = DatetimeIndex( ["2000-01-31", "2000-01-31", "2000-02-29", "2000-03-31"], name="idx", freq=None, ) expected_3_nofreq = DatetimeIndex( ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], name="idx", freq=None, ) cases = [ (0, datetime(1999, 12, 31), expected_0), (-3, datetime(1999, 12, 31), expected_0), (3, datetime(2000, 4, 30), expected_3), (1, datetime(2000, 1, 31), expected_1_nofreq), (3, datetime(2000, 1, 2), expected_3_nofreq), ] for n, d, expected in cases: result = idx.insert(n, d) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq # reset freq to None result = idx.insert(3, datetime(2000, 1, 2)) expected = DatetimeIndex( ["2000-01-31", "2000-02-29", "2000-03-31", "2000-01-02"], name="idx", freq=None, ) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq is None # see gh-7299 idx = date_range("1/1/2000", periods=3, freq="D", tz="Asia/Tokyo", name="idx") with pytest.raises(ValueError): idx.insert(3, pd.Timestamp("2000-01-04")) with pytest.raises(ValueError): idx.insert(3, datetime(2000, 1, 4)) with pytest.raises(ValueError): idx.insert(3, pd.Timestamp("2000-01-04", tz="US/Eastern")) with pytest.raises(ValueError): idx.insert( 3, datetime(2000, 1, 4, tzinfo=pytz.timezone("US/Eastern"))) for tz in ["US/Pacific", "Asia/Singapore"]: idx = date_range("1/1/2000 09:00", periods=6, freq="H", tz=tz, name="idx") # preserve freq expected = date_range("1/1/2000 09:00", periods=7, freq="H", tz=tz, name="idx") for d in [ pd.Timestamp("2000-01-01 15:00", tz=tz), pytz.timezone(tz).localize(datetime(2000, 1, 1, 15)), ]: result = idx.insert(6, d) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq assert result.tz == expected.tz expected = DatetimeIndex( [ "2000-01-01 09:00", "2000-01-01 10:00", "2000-01-01 11:00", "2000-01-01 12:00", "2000-01-01 13:00", "2000-01-01 14:00", "2000-01-01 10:00", ], name="idx", tz=tz, freq=None, ) # reset freq to None for d in [ pd.Timestamp("2000-01-01 10:00", tz=tz), pytz.timezone(tz).localize(datetime(2000, 1, 1, 10)), ]: result = idx.insert(6, d) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.tz == expected.tz assert result.freq is None
def test_delete(self): idx = date_range(start="2000-01-01", periods=5, freq="M", name="idx") # preserve freq expected_0 = date_range(start="2000-02-01", periods=4, freq="M", name="idx") expected_4 = date_range(start="2000-01-01", periods=4, freq="M", name="idx") # reset freq to None expected_1 = DatetimeIndex( ["2000-01-31", "2000-03-31", "2000-04-30", "2000-05-31"], freq=None, name="idx", ) cases = { 0: expected_0, -5: expected_0, -1: expected_4, 4: expected_4, 1: expected_1, } for n, expected in cases.items(): result = idx.delete(n) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): # either depending on numpy version idx.delete(5) for tz in [None, "Asia/Tokyo", "US/Pacific"]: idx = date_range(start="2000-01-01 09:00", periods=10, freq="H", name="idx", tz=tz) expected = date_range(start="2000-01-01 10:00", periods=9, freq="H", name="idx", tz=tz) result = idx.delete(0) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freqstr == "H" assert result.tz == expected.tz expected = date_range(start="2000-01-01 09:00", periods=9, freq="H", name="idx", tz=tz) result = idx.delete(-1) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freqstr == "H" assert result.tz == expected.tz
class TestDatetimeIndexOps: def test_ops_properties_basic(self, datetime_series): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = pd.DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = pd.DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = pd.DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex( [ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ] ) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) def test_resolution(self, tz_naive_fixture): tz = tz_naive_fixture for freq, expected in zip( ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], [ "day", "day", "day", "day", "hour", "minute", "second", "millisecond", "microsecond", ], ): idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) def test_nonunique_contains(self): # GH 9512 for idx in map( DatetimeIndex, ( [0, 1, 0], [0, 0, -1], [0, -1, -1], ["2015", "2015", "2016"], ["2015", "2015", "2014"], ), ): assert idx[0] in idx @pytest.mark.parametrize( "idx", [ DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" ), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() tm.assert_index_equal(idx, result) assert result.freq is None @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_infer_freq(self, freq_sample): # GH 11018 idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = pd.DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert pd.DatetimeIndex._na_value is pd.NaT assert pd.DatetimeIndex([])._na_value is pd.NaT idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = pd.DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(pd.Series(idx)) idx2 = pd.DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(pd.Series(idx2)) # same internal, different tz idx3 = pd.DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(pd.Series(idx3)) # check that we do not raise when comparing with OutOfBounds objects oob = pd.Index([datetime(2500, 1, 1)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) assert not idx3.equals(oob) # check that we do not raise when comparing with OutOfBounds dt64 oob2 = oob.map(np.datetime64) assert not idx.equals(oob2) assert not idx2.equals(oob2) assert not idx3.equals(oob2) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ( "Inferred frequency 2D from passed values does not conform to " "passed frequency 5D" ) with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo"
def setup(self): idx = DatetimeIndex(start=datetime(1990, 1, 1), end=datetime(2012, 1, 1), freq='D') df = DataFrame({'a': 0, 'b': 1, 'c': 2}, index=idx) self.data_frames = dict(enumerate([df] * 100))
def idx(self): naive = DatetimeIndex(["2013-1-1 13:00", "2013-1-2 14:00"], name="B") idx = naive.tz_localize("US/Pacific") return idx
class TestSeriesDatetimeValues: def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodArray._datetimelike_ops ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ "to_period", "to_pydatetime", "tz_localize", "tz_convert", "normalize", "strftime", "round", "floor", "ceil", "day_name", "month_name", ] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = [ "components", "to_pytimedelta", "total_seconds", "round", "floor", "ceil", ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype("int64") elif not is_list_like(result) or isinstance(result, pd.DataFrame): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b elif isinstance(a, pd.DataFrame): tm.assert_frame_equal(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range("20130101", periods=5), name="xxx"), Series(date_range("20130101", periods=5, freq="s"), name="xxx"), Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"), ] for s in cases: for prop in ok_for_dt: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_localize("US/Eastern") exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern") expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "US/Eastern" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # let's localize, then convert result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") exp_values = (DatetimeIndex( s.values).tz_localize("UTC").tz_convert("US/Eastern")) expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") for prop in ok_for_dt: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_convert("CET") expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "CET" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # timedelta index cases = [ Series(timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"), Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"), Series( timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"), name="xxx", ), ] for s in cases: for prop in ok_for_td: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, pd.Series) assert result.dtype == "float64" freq_result = s.dt.freq assert freq_result == TimedeltaIndex(s.values, freq="infer").freq # both index = date_range("20130101", periods=3, freq="D") s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx") exp = Series(np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name="xxx") tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range("20130101", periods=5, freq="D"), name="xxx") ] for s in cases: for prop in ok_for_period: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) s = Series( period_range("20130101", periods=5, freq="D", name="xxx").astype(object)) results = get_dir(s) tm.assert_almost_equal( results, sorted(set(ok_for_period + ok_for_period_methods))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) exp_values = pd.date_range("2015-01-01", "2016-01-01", freq="T", tz="UTC").tz_convert("America/Chicago") expected = Series(exp_values, name="xxx") tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context("chained_assignment", "raise"): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5 @pytest.mark.parametrize( "method, dates", [ ["round", ["2012-01-02", "2012-01-02", "2012-01-01"]], ["floor", ["2012-01-01", "2012-01-01", "2012-01-01"]], ["ceil", ["2012-01-02", "2012-01-02", "2012-01-02"]], ], ) def test_dt_round(self, method, dates): # round s = Series( pd.to_datetime([ "2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00" ]), name="xxx", ) result = getattr(s.dt, method)("D") expected = Series(pd.to_datetime(dates), name="xxx") tm.assert_series_equal(result, expected) def test_dt_round_tz(self): s = Series( pd.to_datetime([ "2012-01-01 13:00:00", "2012-01-01 12:01:00", "2012-01-01 08:00:00" ]), name="xxx", ) result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern").dt.round( "D") exp_values = pd.to_datetime(["2012-01-01", "2012-01-01", "2012-01-01"]).tz_localize("US/Eastern") expected = Series(exp_values, name="xxx") tm.assert_series_equal(result, expected) @pytest.mark.parametrize("method", ["ceil", "round", "floor"]) def test_dt_round_tz_ambiguous(self, method): # GH 18946 round near "fall back" DST df1 = pd.DataFrame( [ pd.to_datetime("2017-10-29 02:00:00+02:00", utc=True), pd.to_datetime("2017-10-29 02:00:00+01:00", utc=True), pd.to_datetime("2017-10-29 03:00:00+01:00", utc=True), ], columns=["date"], ) df1["date"] = df1["date"].dt.tz_convert("Europe/Madrid") # infer result = getattr(df1.date.dt, method)("H", ambiguous="infer") expected = df1["date"] tm.assert_series_equal(result, expected) # bool-array result = getattr(df1.date.dt, method)("H", ambiguous=[True, False, False]) tm.assert_series_equal(result, expected) # NaT result = getattr(df1.date.dt, method)("H", ambiguous="NaT") expected = df1["date"].copy() expected.iloc[0:2] = pd.NaT tm.assert_series_equal(result, expected) # raise with pytest.raises(pytz.AmbiguousTimeError): getattr(df1.date.dt, method)("H", ambiguous="raise") @pytest.mark.parametrize( "method, ts_str, freq", [ ["ceil", "2018-03-11 01:59:00-0600", "5min"], ["round", "2018-03-11 01:59:00-0600", "5min"], ["floor", "2018-03-11 03:01:00-0500", "2H"], ], ) def test_dt_round_tz_nonexistent(self, method, ts_str, freq): # GH 23324 round near "spring forward" DST s = Series([pd.Timestamp(ts_str, tz="America/Chicago")]) result = getattr(s.dt, method)(freq, nonexistent="shift_forward") expected = Series( [pd.Timestamp("2018-03-11 03:00:00", tz="America/Chicago")]) tm.assert_series_equal(result, expected) result = getattr(s.dt, method)(freq, nonexistent="NaT") expected = Series([pd.NaT]).dt.tz_localize(result.dt.tz) tm.assert_series_equal(result, expected) with pytest.raises(pytz.NonExistentTimeError, match="2018-03-11 02:00:00"): getattr(s.dt, method)(freq, nonexistent="raise") def test_dt_namespace_accessor_categorical(self): # GH 19468 dti = DatetimeIndex(["20171111", "20181212"]).repeat(2) s = Series(pd.Categorical(dti), name="foo") result = s.dt.year expected = Series([2017, 2017, 2018, 2018], name="foo") tm.assert_series_equal(result, expected) def test_dt_tz_localize_categorical(self, tz_aware_fixture): # GH 27952 tz = tz_aware_fixture datetimes = pd.Series(["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns]") categorical = datetimes.astype("category") result = categorical.dt.tz_localize(tz) expected = datetimes.dt.tz_localize(tz) tm.assert_series_equal(result, expected) def test_dt_tz_convert_categorical(self, tz_aware_fixture): # GH 27952 tz = tz_aware_fixture datetimes = pd.Series(["2019-01-01", "2019-01-01", "2019-01-02"], dtype="datetime64[ns, MET]") categorical = datetimes.astype("category") result = categorical.dt.tz_convert(tz) expected = datetimes.dt.tz_convert(tz) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("accessor", ["year", "month", "day"]) def test_dt_other_accessors_categorical(self, accessor): # GH 27952 datetimes = pd.Series(["2018-01-01", "2018-01-01", "2019-01-02"], dtype="datetime64[ns]") categorical = datetimes.astype("category") result = getattr(categorical.dt, accessor) expected = getattr(datetimes.dt, accessor) tm.assert_series_equal(result, expected) def test_dt_accessor_no_new_attributes(self): # https://github.com/pandas-dev/pandas/issues/10673 s = Series(date_range("20130101", periods=5, freq="D")) with pytest.raises(AttributeError, match="You cannot add any new attribute"): s.dt.xlabel = "a" @pytest.mark.parametrize("time_locale", [None] if tm.get_locales() is None else [None] + tm.get_locales()) def test_dt_accessor_datetime_name_accessors(self, time_locale): # Test Monday -> Sunday and January -> December, in that sequence if time_locale is None: # If the time_locale is None, day-name and month_name should # return the english attributes expected_days = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ] expected_months = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", ] else: with tm.set_locale(time_locale, locale.LC_TIME): expected_days = calendar.day_name[:] expected_months = calendar.month_name[1:] s = Series( date_range(freq="D", start=datetime(1998, 1, 1), periods=365)) english_days = [ "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", ] for day, name, eng_name in zip(range(4, 11), expected_days, english_days): name = name.capitalize() assert s.dt.day_name(locale=time_locale)[day] == name s = s.append(Series([pd.NaT])) assert np.isnan(s.dt.day_name(locale=time_locale).iloc[-1]) s = Series(date_range(freq="M", start="2012", end="2013")) result = s.dt.month_name(locale=time_locale) expected = Series([month.capitalize() for month in expected_months]) # work around https://github.com/pandas-dev/pandas/issues/22342 result = result.str.normalize("NFD") expected = expected.str.normalize("NFD") tm.assert_series_equal(result, expected) for s_date, expected in zip(s, expected_months): result = s_date.month_name(locale=time_locale) expected = expected.capitalize() result = unicodedata.normalize("NFD", result) expected = unicodedata.normalize("NFD", expected) assert result == expected s = s.append(Series([pd.NaT])) assert np.isnan(s.dt.month_name(locale=time_locale).iloc[-1]) def test_strftime(self): # GH 10086 s = Series(date_range("20130101", periods=5)) result = s.dt.strftime("%Y/%m/%d") expected = Series([ "2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05" ]) tm.assert_series_equal(result, expected) s = Series(date_range("2015-02-03 11:22:33.4567", periods=5)) result = s.dt.strftime("%Y/%m/%d %H-%M-%S") expected = Series([ "2015/02/03 11-22-33", "2015/02/04 11-22-33", "2015/02/05 11-22-33", "2015/02/06 11-22-33", "2015/02/07 11-22-33", ]) tm.assert_series_equal(result, expected) s = Series(period_range("20130101", periods=5)) result = s.dt.strftime("%Y/%m/%d") expected = Series([ "2013/01/01", "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05" ]) tm.assert_series_equal(result, expected) s = Series( period_range("2015-02-03 11:22:33.4567", periods=5, freq="s")) result = s.dt.strftime("%Y/%m/%d %H-%M-%S") expected = Series([ "2015/02/03 11-22-33", "2015/02/03 11-22-34", "2015/02/03 11-22-35", "2015/02/03 11-22-36", "2015/02/03 11-22-37", ]) tm.assert_series_equal(result, expected) s = Series(date_range("20130101", periods=5)) s.iloc[0] = pd.NaT result = s.dt.strftime("%Y/%m/%d") expected = Series( [np.nan, "2013/01/02", "2013/01/03", "2013/01/04", "2013/01/05"]) tm.assert_series_equal(result, expected) datetime_index = date_range("20150301", periods=5) result = datetime_index.strftime("%Y/%m/%d") expected = Index( [ "2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05" ], dtype=np.object_, ) # dtype may be S10 or U10 depending on python version tm.assert_index_equal(result, expected) period_index = period_range("20150301", periods=5) result = period_index.strftime("%Y/%m/%d") expected = Index( [ "2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05" ], dtype="=U10", ) tm.assert_index_equal(result, expected) s = Series( [datetime(2013, 1, 1, 2, 32, 59), datetime(2013, 1, 2, 14, 32, 1)]) result = s.dt.strftime("%Y-%m-%d %H:%M:%S") expected = Series(["2013-01-01 02:32:59", "2013-01-02 14:32:01"]) tm.assert_series_equal(result, expected) s = Series(period_range("20130101", periods=4, freq="H")) result = s.dt.strftime("%Y/%m/%d %H:%M:%S") expected = Series([ "2013/01/01 00:00:00", "2013/01/01 01:00:00", "2013/01/01 02:00:00", "2013/01/01 03:00:00", ]) s = Series(period_range("20130101", periods=4, freq="L")) result = s.dt.strftime("%Y/%m/%d %H:%M:%S.%l") expected = Series([ "2013/01/01 00:00:00.000", "2013/01/01 00:00:00.001", "2013/01/01 00:00:00.002", "2013/01/01 00:00:00.003", ]) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "data", [ DatetimeIndex(["2019-01-01", pd.NaT]), PeriodIndex(["2019-01-01", pd.NaT], dtype="period[D]"), ], ) def test_strftime_nat(self, data): # GH 29578 s = Series(data) result = s.dt.strftime("%Y-%m-%d") expected = Series(["2019-01-01", np.nan]) tm.assert_series_equal(result, expected) def test_valid_dt_with_missing_values(self): from datetime import date, time # GH 8689 s = Series(date_range("20130101", periods=5, freq="D")) s.iloc[2] = pd.NaT for attr in [ "microsecond", "nanosecond", "second", "minute", "hour", "day" ]: expected = getattr(s.dt, attr).copy() expected.iloc[2] = np.nan result = getattr(s.dt, attr) tm.assert_series_equal(result, expected) result = s.dt.date expected = Series( [ date(2013, 1, 1), date(2013, 1, 2), np.nan, date(2013, 1, 4), date(2013, 1, 5), ], dtype="object", ) tm.assert_series_equal(result, expected) result = s.dt.time expected = Series([time(0), time(0), np.nan, time(0), time(0)], dtype="object") tm.assert_series_equal(result, expected) def test_dt_accessor_api(self): # GH 9322 from pandas.core.indexes.accessors import ( CombinedDatetimelikeProperties, DatetimeProperties, ) assert Series.dt is CombinedDatetimelikeProperties s = Series(date_range("2000-01-01", periods=3)) assert isinstance(s.dt, DatetimeProperties) @pytest.mark.parametrize("ser", [ Series(np.arange(5)), Series(list("abcde")), Series(np.random.randn(5)) ]) def test_dt_accessor_invalid(self, ser): # GH#9322 check that series with incorrect dtypes don't have attr with pytest.raises(AttributeError, match="only use .dt accessor"): ser.dt assert not hasattr(ser, "dt") def test_dt_accessor_updates_on_inplace(self): s = Series(pd.date_range("2018-01-01", periods=10)) s[2] = None s.fillna(pd.Timestamp("2018-01-01"), inplace=True) result = s.dt.date assert result[0] == result[2] def test_date_tz(self): # GH11757 rng = pd.DatetimeIndex( ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz="US/Eastern", ) s = Series(rng) expected = Series( [date(2014, 4, 4), date(2014, 7, 18), date(2015, 11, 22)]) tm.assert_series_equal(s.dt.date, expected) tm.assert_series_equal(s.apply(lambda x: x.date()), expected) def test_dt_timetz_accessor(self, tz_naive_fixture): # GH21358 tz = maybe_get_tz(tz_naive_fixture) dtindex = pd.DatetimeIndex( ["2014-04-04 23:56", "2014-07-18 21:24", "2015-11-22 22:14"], tz=tz) s = Series(dtindex) expected = Series([ time(23, 56, tzinfo=tz), time(21, 24, tzinfo=tz), time(22, 14, tzinfo=tz) ]) result = s.dt.timetz tm.assert_series_equal(result, expected) def test_setitem_with_string_index(self): # GH 23451 x = pd.Series([1, 2, 3], index=["Date", "b", "other"]) x["Date"] = date.today() assert x.Date == date.today() assert x["Date"] == date.today() def test_setitem_with_different_tz(self): # GH#24024 ser = pd.Series(pd.date_range("2000", periods=2, tz="US/Central")) ser[0] = pd.Timestamp("2000", tz="US/Eastern") expected = pd.Series( [ pd.Timestamp("2000-01-01 00:00:00-05:00", tz="US/Eastern"), pd.Timestamp("2000-01-02 00:00:00-06:00", tz="US/Central"), ], dtype=object, ) tm.assert_series_equal(ser, expected) @pytest.mark.parametrize( "input_series, expected_output", [ [["2020-01-01"], [[2020, 1, 3]]], [[pd.NaT], [[np.NaN, np.NaN, np.NaN]]], [["2019-12-31", "2019-12-29"], [[2020, 1, 2], [2019, 52, 7]]], [["2010-01-01", pd.NaT], [[2009, 53, 5], [np.NaN, np.NaN, np.NaN]] ], ], ) def test_isocalendar(self, input_series, expected_output): result = pd.to_datetime(pd.Series(input_series)).dt.isocalendar expected_frame = pd.DataFrame(expected_output, columns=["year", "week", "day"], dtype="UInt32") tm.assert_frame_equal(result, expected_frame)
def create_block(typestr, placement, item_shape=None, num_offset=0): """ Supported typestr: * float, f8, f4, f2 * int, i8, i4, i2, i1 * uint, u8, u4, u2, u1 * complex, c16, c8 * bool * object, string, O * datetime, dt, M8[ns], M8[ns, tz] * timedelta, td, m8[ns] * sparse (SparseArray with fill_value=0.0) * sparse_na (SparseArray with fill_value=np.nan) * category, category2 """ placement = BlockPlacement(placement) num_items = len(placement) if item_shape is None: item_shape = (N,) shape = (num_items,) + item_shape mat = get_numeric_mat(shape) if typestr in ( "float", "f8", "f4", "f2", "int", "i8", "i4", "i2", "i1", "uint", "u8", "u4", "u2", "u1", ): values = mat.astype(typestr) + num_offset elif typestr in ("complex", "c16", "c8"): values = 1.0j * (mat.astype(typestr) + num_offset) elif typestr in ("object", "string", "O"): values = np.reshape([f"A{i:d}" for i in mat.ravel() + num_offset], shape) elif typestr in ("b", "bool"): values = np.ones(shape, dtype=np.bool_) elif typestr in ("datetime", "dt", "M8[ns]"): values = (mat * 1e9).astype("M8[ns]") elif typestr.startswith("M8[ns"): # datetime with tz m = re.search(r"M8\[ns,\s*(\w+\/?\w*)\]", typestr) assert m is not None, f"incompatible typestr -> {typestr}" tz = m.groups()[0] assert num_items == 1, "must have only 1 num items for a tz-aware" values = DatetimeIndex(np.arange(N) * 1e9, tz=tz) elif typestr in ("timedelta", "td", "m8[ns]"): values = (mat * 1).astype("m8[ns]") elif typestr in ("category",): values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) elif typestr in ("category2",): values = Categorical(["a", "a", "a", "a", "b", "b", "c", "c", "c", "d"]) elif typestr in ("sparse", "sparse_na"): # FIXME: doesn't support num_rows != 10 assert shape[-1] == 10 assert all(s == 1 for s in shape[:-1]) if typestr.endswith("_na"): fill_value = np.nan else: fill_value = 0.0 values = SparseArray( [fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6], fill_value=fill_value, ) arr = values.sp_values.view() arr += num_offset - 1 else: raise ValueError(f'Unsupported typestr: "{typestr}"') return make_block(values, placement=placement, ndim=len(shape))
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodArray._datetimelike_ops ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ "to_period", "to_pydatetime", "tz_localize", "tz_convert", "normalize", "strftime", "round", "floor", "ceil", "day_name", "month_name", ] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = [ "components", "to_pytimedelta", "total_seconds", "round", "floor", "ceil", ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype("int64") elif not is_list_like(result) or isinstance(result, pd.DataFrame): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b elif isinstance(a, pd.DataFrame): tm.assert_frame_equal(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range("20130101", periods=5), name="xxx"), Series(date_range("20130101", periods=5, freq="s"), name="xxx"), Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"), ] for s in cases: for prop in ok_for_dt: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_localize("US/Eastern") exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern") expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "US/Eastern" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # let's localize, then convert result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") exp_values = (DatetimeIndex( s.values).tz_localize("UTC").tz_convert("US/Eastern")) expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") for prop in ok_for_dt: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_convert("CET") expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "CET" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # timedelta index cases = [ Series(timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"), Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"), Series( timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"), name="xxx", ), ] for s in cases: for prop in ok_for_td: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, pd.Series) assert result.dtype == "float64" freq_result = s.dt.freq assert freq_result == TimedeltaIndex(s.values, freq="infer").freq # both index = date_range("20130101", periods=3, freq="D") s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx") exp = Series(np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name="xxx") tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range("20130101", periods=5, freq="D"), name="xxx") ] for s in cases: for prop in ok_for_period: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) s = Series( period_range("20130101", periods=5, freq="D", name="xxx").astype(object)) results = get_dir(s) tm.assert_almost_equal( results, sorted(set(ok_for_period + ok_for_period_methods))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) exp_values = pd.date_range("2015-01-01", "2016-01-01", freq="T", tz="UTC").tz_convert("America/Chicago") expected = Series(exp_values, name="xxx") tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context("chained_assignment", "raise"): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5
def test_ewm_with_nat_raises(halflife_with_times): # GH#38535 ser = Series(range(1)) times = DatetimeIndex(["NaT"]) with pytest.raises(ValueError, match="Cannot convert NaT values to integer"): ser.ewm(com=0.1, halflife=halflife_with_times, times=times)
def test_get_loc_reasonable_key_error(self): # GH#1062 index = DatetimeIndex(["1/3/2000"]) with pytest.raises(KeyError, match="2000"): index.get_loc("1/1/2000")
def setup(self): N = 10**5 B = N + 20000 self.datetime_left = DatetimeIndex(range(N)) self.datetime_right = DatetimeIndex(range(N, B))
def test_contains_nonunique(self, vals): # GH#9512 idx = DatetimeIndex(vals) assert idx[0] in idx
def test_datetimeindex_tz(self): rng = date_range('03/12/2012 00:00', periods=10, freq='W-FRI', tz='US/Eastern') rng2 = DatetimeIndex(data=rng, tz='US/Eastern') self.assertTrue(rng.equals(rng2))
def test_tdi_radd_timestamp(self): idx = TimedeltaIndex(['1 day', '2 day']) result = Timestamp('2011-01-01') + idx expected = DatetimeIndex(['2011-01-02', '2011-01-03']) tm.assert_index_equal(result, expected)
def test_nat(self): # GH 5546 dates = [NaT] idx = DatetimeIndex(dates) idx = idx.tz_localize('US/Pacific') self.assertTrue(idx.equals(DatetimeIndex(dates, tz='US/Pacific'))) idx = idx.tz_convert('US/Eastern') self.assertTrue(idx.equals(DatetimeIndex(dates, tz='US/Eastern'))) idx = idx.tz_convert('UTC') self.assertTrue(idx.equals(DatetimeIndex(dates, tz='UTC'))) dates = ['2010-12-01 00:00', '2010-12-02 00:00', NaT] idx = DatetimeIndex(dates) idx = idx.tz_localize('US/Pacific') self.assertTrue(idx.equals(DatetimeIndex(dates, tz='US/Pacific'))) idx = idx.tz_convert('US/Eastern') expected = ['2010-12-01 03:00', '2010-12-02 03:00', NaT] self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Eastern'))) idx = idx + offsets.Hour(5) expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Eastern'))) idx = idx.tz_convert('US/Pacific') expected = ['2010-12-01 05:00', '2010-12-02 05:00', NaT] self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Pacific'))) if not _np_version_under1p7: idx = idx + np.timedelta64(3, 'h') expected = ['2010-12-01 08:00', '2010-12-02 08:00', NaT] self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Pacific'))) idx = idx.tz_convert('US/Eastern') expected = ['2010-12-01 11:00', '2010-12-02 11:00', NaT] self.assertTrue(idx.equals(DatetimeIndex(expected, tz='US/Eastern')))
def test_datetimeindex_from_empty_datetime64_array(): for unit in ['ms', 'us', 'ns']: idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) assert (len(idx) == 0)
def test_static_tzinfo(self): # it works! index = DatetimeIndex([datetime(2012, 1, 1)], tz=self.tzstr('EST')) index.hour index[0]
def _convert_listlike_datetimes(arg, box, format, name=None, tz=None, unit=None, errors=None, infer_datetime_format=None, dayfirst=None, yearfirst=None, exact=None): """ Helper function for to_datetime. Performs the conversions of 1D listlike of dates Parameters ---------- arg : list, tuple, ndarray, Series, Index date to be parced box : boolean True boxes result as an Index-like, False returns an ndarray name : object None or string for the Index name tz : object None or 'utc' unit : string None or string of the frequency of the passed data errors : string error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore' infer_datetime_format : boolean inferring format behavior from to_datetime dayfirst : boolean dayfirst parsing behavior from to_datetime yearfirst : boolean yearfirst parsing behavior from to_datetime exact : boolean exact format matching behavior from to_datetime Returns ------- ndarray of parsed dates Returns: - Index-like if box=True - ndarray of Timestamps if box=False """ from pandas import DatetimeIndex from pandas.core.arrays import DatetimeArray from pandas.core.arrays.datetimes import (maybe_convert_dtype, objects_to_datetime64ns) if isinstance(arg, (list, tuple)): arg = np.array(arg, dtype='O') # these are shortcutable if is_datetime64tz_dtype(arg): if not isinstance(arg, (DatetimeArray, DatetimeIndex)): return DatetimeIndex(arg, tz=tz, name=name) if tz == 'utc': arg = arg.tz_convert(None).tz_localize(tz) return arg elif is_datetime64_ns_dtype(arg): if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)): try: return DatetimeIndex(arg, tz=tz, name=name) except ValueError: pass return arg elif unit is not None: if format is not None: raise ValueError("cannot specify both format and unit") arg = getattr(arg, 'values', arg) result, tz_parsed = tslib.array_with_unit_to_datetime(arg, unit, errors=errors) if box: if errors == 'ignore': from pandas import Index result = Index(result, name=name) else: result = DatetimeIndex(result, name=name) # GH 23758: We may still need to localize the result with tz # GH 25546: Apply tz_parsed first (from arg), then tz (from caller) # result will be naive but in UTC try: result = result.tz_localize('UTC').tz_convert(tz_parsed) except AttributeError: # Regular Index from 'ignore' path return result if tz is not None: if result.tz is None: result = result.tz_localize(tz) else: result = result.tz_convert(tz) return result elif getattr(arg, 'ndim', 1) > 1: raise TypeError('arg must be a string, datetime, list, tuple, ' '1-d array, or Series') # warn if passing timedelta64, raise for PeriodDtype # NB: this must come after unit transformation orig_arg = arg arg, _ = maybe_convert_dtype(arg, copy=False) arg = ensure_object(arg) require_iso8601 = False if infer_datetime_format and format is None: format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst) if format is not None: # There is a special fast-path for iso8601 formatted # datetime strings, so in those cases don't use the inferred # format because this path makes process slower in this # special case format_is_iso8601 = _format_is_iso(format) if format_is_iso8601: require_iso8601 = not infer_datetime_format format = None tz_parsed = None result = None if format is not None: try: # shortcut formatting here if format == '%Y%m%d': try: # pass orig_arg as float-dtype may have been converted to # datetime64[ns] orig_arg = ensure_object(orig_arg) result = _attempt_YYYYMMDD(orig_arg, errors=errors) except (ValueError, TypeError, tslibs.OutOfBoundsDatetime): raise ValueError("cannot convert the input to " "'%Y%m%d' date format") # fallback if result is None: try: result, timezones = array_strptime(arg, format, exact=exact, errors=errors) if '%Z' in format or '%z' in format: return _return_parsed_timezone_results( result, timezones, box, tz, name) except tslibs.OutOfBoundsDatetime: if errors == 'raise': raise elif errors == 'coerce': result = np.empty(arg.shape, dtype='M8[ns]') iresult = result.view('i8') iresult.fill(tslibs.iNaT) else: result = arg except ValueError: # if format was inferred, try falling back # to array_to_datetime - terminate here # for specified formats if not infer_datetime_format: if errors == 'raise': raise elif errors == 'coerce': result = np.empty(arg.shape, dtype='M8[ns]') iresult = result.view('i8') iresult.fill(tslibs.iNaT) else: result = arg except ValueError as e: # Fallback to try to convert datetime objects if timezone-aware # datetime objects are found without passing `utc=True` try: values, tz = conversion.datetime_to_datetime64(arg) return DatetimeIndex._simple_new(values, name=name, tz=tz) except (ValueError, TypeError): raise e if result is None: assert format is None or infer_datetime_format utc = tz == 'utc' result, tz_parsed = objects_to_datetime64ns( arg, dayfirst=dayfirst, yearfirst=yearfirst, utc=utc, errors=errors, require_iso8601=require_iso8601, allow_object=True) if tz_parsed is not None: if box: # We can take a shortcut since the datetime64 numpy array # is in UTC return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed) else: # Convert the datetime64 numpy array to an numpy array # of datetime objects result = [ Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result ] return np.array(result, dtype=object) if box: utc = tz == 'utc' return _box_as_indexlike(result, utc=utc, name=name) return result
def test_tzaware_datetime_to_index(self): d = [datetime(2012, 8, 19, tzinfo=self.tz('US/Eastern'))] index = DatetimeIndex(d) self.assertTrue(self.cmptz(index.tz, self.tz('US/Eastern')))
def test_argmin_argmax(self): idx = DatetimeIndex(['2000-01-04', '2000-01-01', '2000-01-02']) self.assertEqual(idx.argmin(), 1) self.assertEqual(idx.argmax(), 0)
def create_block(typestr, placement, item_shape=None, num_offset=0): """ Supported typestr: * float, f8, f4, f2 * int, i8, i4, i2, i1 * uint, u8, u4, u2, u1 * complex, c16, c8 * bool * object, string, O * datetime, dt, M8[ns], M8[ns, tz] * timedelta, td, m8[ns] * sparse (SparseArray with fill_value=0.0) * sparse_na (SparseArray with fill_value=np.nan) * category, category2 """ placement = BlockPlacement(placement) num_items = len(placement) if item_shape is None: item_shape = (N, ) shape = (num_items, ) + item_shape mat = get_numeric_mat(shape) if typestr in ('float', 'f8', 'f4', 'f2', 'int', 'i8', 'i4', 'i2', 'i1', 'uint', 'u8', 'u4', 'u2', 'u1'): values = mat.astype(typestr) + num_offset elif typestr in ('complex', 'c16', 'c8'): values = 1.j * (mat.astype(typestr) + num_offset) elif typestr in ('object', 'string', 'O'): values = np.reshape(['A%d' % i for i in mat.ravel() + num_offset], shape) elif typestr in ('b', 'bool', ): values = np.ones(shape, dtype=np.bool_) elif typestr in ('datetime', 'dt', 'M8[ns]'): values = (mat * 1e9).astype('M8[ns]') elif typestr.startswith('M8[ns'): # datetime with tz m = re.search(r'M8\[ns,\s*(\w+\/?\w*)\]', typestr) assert m is not None, "incompatible typestr -> {0}".format(typestr) tz = m.groups()[0] assert num_items == 1, "must have only 1 num items for a tz-aware" values = DatetimeIndex(np.arange(N) * 1e9, tz=tz) elif typestr in ('timedelta', 'td', 'm8[ns]'): values = (mat * 1).astype('m8[ns]') elif typestr in ('category', ): values = Categorical([1, 1, 2, 2, 3, 3, 3, 3, 4, 4]) elif typestr in ('category2', ): values = Categorical(['a', 'a', 'a', 'a', 'b', 'b', 'c', 'c', 'c', 'd' ]) elif typestr in ('sparse', 'sparse_na'): # FIXME: doesn't support num_rows != 10 assert shape[-1] == 10 assert all(s == 1 for s in shape[:-1]) if typestr.endswith('_na'): fill_value = np.nan else: fill_value = 0.0 values = SparseArray([fill_value, fill_value, 1, 2, 3, fill_value, 4, 5, fill_value, 6], fill_value=fill_value) arr = values.sp_values.view() arr += (num_offset - 1) else: raise ValueError('Unsupported typestr: "%s"' % typestr) return make_block(values, placement=placement, ndim=len(shape))