def test_override_set_noconvert_columns(): # see gh-17351 # # Usecols needs to be sorted in _set_noconvert_columns based # on the test_usecols_with_parse_dates test from test_usecols.py class MyTextFileReader(TextFileReader): def __init__(self): self._currow = 0 self.squeeze = False class MyCParserWrapper(CParserWrapper): def _set_noconvert_columns(self): if self.usecols_dtype == "integer": # self.usecols is a set, which is documented as unordered # but in practice, a CPython set of integers is sorted. # In other implementations this assumption does not hold. # The following code simulates a different order, which # before GH 17351 would cause the wrong columns to be # converted via the parse_dates parameter self.usecols = list(self.usecols) self.usecols.reverse() return CParserWrapper._set_noconvert_columns(self) data = """a,b,c,d,e 0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] cols = { "a": [0, 0], "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], } expected = DataFrame(cols, columns=["c_d", "a"]) parser = MyTextFileReader() parser.options = { "usecols": [0, 2, 3], "parse_dates": parse_dates, "delimiter": ",", } parser.engine = "c" parser._engine = MyCParserWrapper(StringIO(data), **parser.options) result = parser.read() tm.assert_frame_equal(result, expected)
def test_override__set_noconvert_columns(self): # GH 17351 - usecols needs to be sorted in _setnoconvert_columns # based on the test_usecols_with_parse_dates test from usecols.py from pandas.io.parsers import CParserWrapper, TextFileReader s = """a,b,c,d,e 0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] cols = { 'a': [0, 0], 'c_d': [ Timestamp('2014-01-01 09:00:00'), Timestamp('2014-01-02 10:00:00') ] } expected = DataFrame(cols, columns=['c_d', 'a']) class MyTextFileReader(TextFileReader): def __init__(self): self._currow = 0 self.squeeze = False class MyCParserWrapper(CParserWrapper): def _set_noconvert_columns(self): if self.usecols_dtype == 'integer': # self.usecols is a set, which is documented as unordered # but in practice, a CPython set of integers is sorted. # In other implementations this assumption does not hold. # The following code simulates a different order, which # before GH 17351 would cause the wrong columns to be # converted via the parse_dates parameter self.usecols = list(self.usecols) self.usecols.reverse() return CParserWrapper._set_noconvert_columns(self) parser = MyTextFileReader() parser.options = { 'usecols': [0, 2, 3], 'parse_dates': parse_dates, 'delimiter': ',' } parser._engine = MyCParserWrapper(StringIO(s), **parser.options) df = parser.read() tm.assert_frame_equal(df, expected)
def test_usecols_with_parse_dates_and_names(all_parsers, usecols, names): # see gh-9755 s = """0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] parser = all_parsers cols = { "a": [0, 0], "c_d": [Timestamp("2014-01-01 09:00:00"), Timestamp("2014-01-02 10:00:00")], } expected = DataFrame(cols, columns=["c_d", "a"]) result = parser.read_csv( StringIO(s), names=names, parse_dates=parse_dates, usecols=usecols ) tm.assert_frame_equal(result, expected)
def test_multiple_date_col_timestamp_parse(self): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" result = self.read_csv(StringIO(data), sep=',', header=None, parse_dates=[[0, 1]], date_parser=Timestamp) ex_val = Timestamp('05/31/2012 15:30:00.029') assert result['0_1'][0] == ex_val
def create_event(self, port, time, type, value): e = Event( Mention(airport_id=port, type='snow', datetime_reported=Timestamp.today(), datetime_happened=time, raw_description=value)) self.add_event(e, time_area='01:00:00') return e
def _to_m8(key, tz=None): """ Timestamp-like => dt64 """ if not isinstance(key, Timestamp): # this also converts strings key = Timestamp(key, tz=tz) return np.int64(conversion.pydt_to_i8(key)).view(_NS_DTYPE)
def _has_same_tz(self, other): zzone = self._timezone # vzone sholdn't be None if value is non-datetime like if isinstance(other, np.datetime64): # convert to Timestamp as np.datetime64 doesn't have tz attr other = Timestamp(other) vzone = timezones.get_timezone(getattr(other, 'tzinfo', '__no_tz__')) return zzone == vzone
def _generate_regular_range(cls, start, end, periods, freq): if isinstance(freq, Tick): stride = freq.nanos if periods is None: b = Timestamp(start).value # cannot just use e = Timestamp(end) + 1 because arange breaks when # stride is too large, see GH10887 e = (b + (Timestamp(end).value - b) // stride * stride + stride // 2 + 1) # end.tz == start.tz by this point due to _generate implementation tz = start.tz elif start is not None: b = Timestamp(start).value e = b + np.int64(periods) * stride tz = start.tz elif end is not None: e = Timestamp(end).value + stride b = e - np.int64(periods) * stride tz = end.tz else: raise ValueError("at least 'start' or 'end' should be specified " "if a 'period' is given.") data = np.arange(b, e, stride, dtype=np.int64) data = cls._simple_new(data.view(_NS_DTYPE), None, tz=tz) else: tz = None if isinstance(start, Timestamp): tz = start.tz start = start.to_pydatetime() if isinstance(end, Timestamp): tz = end.tz end = end.to_pydatetime() xdr = generate_range(start=start, end=end, periods=periods, offset=freq) values = np.array([x.value for x in xdr]) data = cls._simple_new(values, freq=freq, tz=tz) return data
def test_csvtext(self): csvtext = """2017-09-12,932.1 2017-09-13,935.0 2017-09-14,925.1 2017-09-15,920.2 """ series = from_csvtext(csvtext) self.assertEqual(series.index[0], Timestamp('2017-09-12')) self.assertEqual(series.index[1], Timestamp('2017-09-13')) self.assertEqual(series.index[2], Timestamp('2017-09-14')) self.assertEqual(series.index[3], Timestamp('2017-09-15')) self.assertEqual(series[0], 932.1) self.assertEqual(series[1], 935.0) self.assertEqual(series[2], 925.1) self.assertEqual(series[3], 920.2) self.assertEqual(to_csvtext(series), csvtext)
def test_multiple_date_col_timestamp_parse(all_parsers): parser = all_parsers data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" result = parser.read_csv(StringIO(data), parse_dates=[[0, 1]], header=None, date_parser=Timestamp) expected = DataFrame([[ Timestamp("05/31/2012, 15:30:00.029"), 1306.25, 1, "E", 0, np.nan, 1306.25 ], [ Timestamp("05/31/2012, 15:30:00.029"), 1306.25, 8, "E", 0, np.nan, 1306.25 ]], columns=["0_1", 2, 3, 4, 5, 6, 7]) tm.assert_frame_equal(result, expected)
def test_parse_tz_aware(all_parsers): # See gh-1693 parser = all_parsers data = "Date,x\n2012-06-13T01:39:00Z,0.5" result = parser.read_csv(StringIO(data), index_col=0, parse_dates=True) expected = DataFrame({"x": [0.5]}, index=Index([Timestamp("2012-06-13 01:39:00+00:00")], name="Date")) tm.assert_frame_equal(result, expected) assert result.index.tz is pytz.utc
def test_usecols_with_parse_dates2(all_parsers): # see gh-13604 parser = all_parsers data = """2008-02-07 09:40,1032.43 2008-02-07 09:50,1042.54 2008-02-07 10:00,1051.65""" names = ["date", "values"] usecols = names[:] parse_dates = [0] index = Index([Timestamp("2008-02-07 09:40"), Timestamp("2008-02-07 09:50"), Timestamp("2008-02-07 10:00")], name="date") cols = {"values": [1032.43, 1042.54, 1051.65]} expected = DataFrame(cols, index=index) result = parser.read_csv(StringIO(data), parse_dates=parse_dates, index_col=0, usecols=usecols, header=None, names=names) tm.assert_frame_equal(result, expected)
def test_nat_parse(all_parsers): # see gh-3062 parser = all_parsers df = DataFrame( dict({"A": np.arange(10, dtype="float64"), "B": Timestamp("20010101")}) ) df.iloc[3:6, :] = np.nan with tm.ensure_clean("__nat_parse_.csv") as path: df.to_csv(path) result = parser.read_csv(path, index_col=0, parse_dates=["B"]) tm.assert_frame_equal(result, df)
def test_index_groupby(self): int_idx = Index(range(6)) float_idx = Index(np.arange(0, 0.6, 0.1)) obj_idx = Index('A B C D E F'.split()) dt_idx = pd.date_range('2013-01-01', freq='M', periods=6) for idx in [int_idx, float_idx, obj_idx, dt_idx]: to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) tm.assert_dict_equal(idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]}) to_groupby = Index([datetime(2011, 11, 1), datetime(2011, 12, 1), pd.NaT, pd.NaT, datetime(2011, 12, 1), datetime(2011, 11, 1)], tz='UTC').values ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')] expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]} tm.assert_dict_equal(idx.groupby(to_groupby), expected)
def test_date_parser_int_bug(all_parsers): # see gh-3071 parser = all_parsers data = ( "posix_timestamp,elapsed,sys,user,queries,query_time,rows," "accountid,userid,contactid,level,silo,method\n" "1343103150,0.062353,0,4,6,0.01690,3," "12345,1,-1,3,invoice_InvoiceResource,search\n" ) result = parser.read_csv( StringIO(data), index_col=0, parse_dates=[0], date_parser=lambda x: datetime.utcfromtimestamp(int(x)), ) expected = DataFrame( [ [ 0.062353, 0, 4, 6, 0.01690, 3, 12345, 1, -1, 3, "invoice_InvoiceResource", "search", ] ], columns=[ "elapsed", "sys", "user", "queries", "query_time", "rows", "accountid", "userid", "contactid", "level", "silo", "method", ], index=Index([Timestamp("2012-07-24 04:12:30")], name="posix_timestamp"), ) tm.assert_frame_equal(result, expected)
def test_date_parser_usecols_thousands(all_parsers): # GH#39365 data = """A,B,C 1,3,20-09-01-01 2,4,20-09-01-01 """ parser = all_parsers result = parser.read_csv( StringIO(data), parse_dates=[1], usecols=[1, 2], thousands="-", ) expected = DataFrame({"B": [3, 4], "C": [Timestamp("20-09-2001 01:00:00")] * 2}) tm.assert_frame_equal(result, expected)
def test_datetime_units(self): val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) roundtrip = ujson.decode(ujson.encode(val, date_unit='s')) assert roundtrip == stamp.value // 10**9 roundtrip = ujson.decode(ujson.encode(val, date_unit='ms')) assert roundtrip == stamp.value // 10**6 roundtrip = ujson.decode(ujson.encode(val, date_unit='us')) assert roundtrip == stamp.value // 10**3 roundtrip = ujson.decode(ujson.encode(val, date_unit='ns')) assert roundtrip == stamp.value pytest.raises(ValueError, ujson.encode, val, date_unit='foo')
def test_usecols_with_parse_dates3(all_parsers): # see gh-14792 parser = all_parsers data = """a,b,c,d,e,f,g,h,i,j 2016/09/21,1,1,2,3,4,5,6,7,8""" usecols = list("abcdefghij") parse_dates = [0] cols = {"a": Timestamp("2016-09-21"), "b": [1], "c": [1], "d": [2], "e": [3], "f": [4], "g": [5], "h": [6], "i": [7], "j": [8]} expected = DataFrame(cols, columns=usecols) result = parser.read_csv(StringIO(data), usecols=usecols, parse_dates=parse_dates) tm.assert_frame_equal(result, expected)
def test_datetime_units(self): val = datetime.datetime(2013, 8, 17, 21, 17, 12, 215504) stamp = Timestamp(val) roundtrip = ujson.decode(ujson.encode(val, date_unit="s")) assert roundtrip == stamp.value // 10**9 roundtrip = ujson.decode(ujson.encode(val, date_unit="ms")) assert roundtrip == stamp.value // 10**6 roundtrip = ujson.decode(ujson.encode(val, date_unit="us")) assert roundtrip == stamp.value // 10**3 roundtrip = ujson.decode(ujson.encode(val, date_unit="ns")) assert roundtrip == stamp.value msg = "Invalid value 'foo' for option 'date_unit'" with pytest.raises(ValueError, match=msg): ujson.encode(val, date_unit="foo")
def test_datetimeindex(): idx1 = pd.DatetimeIndex( ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo" ) idx2 = date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern") idx = MultiIndex.from_arrays([idx1, idx2]) expected1 = pd.DatetimeIndex( ["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo" ) tm.assert_index_equal(idx.levels[0], expected1) tm.assert_index_equal(idx.levels[1], idx2) # from datetime combos # GH 7888 date1 = date.today() date2 = datetime.today() date3 = Timestamp.today() for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]): index = MultiIndex.from_product([[d1], [d2]]) assert isinstance(index.levels[0], pd.DatetimeIndex) assert isinstance(index.levels[1], pd.DatetimeIndex)
import pandas as pd import numpy as np from pandas._libs.tslib import Timestamp if __name__ == '__main__': df1 = pd.read_csv('pdroughts.csv') ddata = df1.copy() ddata['valid_start'] = pd.to_datetime(ddata['valid_start']) ddata['valid_end'] = pd.to_datetime(ddata['valid_end']) #select 2010 droughts data ddata = ddata[((ddata['valid_start'] >= Timestamp('2010-01-01 00:00:00')) \ & (ddata['valid_start'] < Timestamp('2011-01-01 00:00:00'))) \ & ((ddata['valid_end'] >= Timestamp('2010-01-01 00:00:00')) \ & (ddata['valid_end'] < Timestamp('2011-01-01 00:00:00')))] #only select the droughts(d2-4) that affect at least 50% of the population ddata = ddata[ddata['d2'] + ddata['d3'] + ddata['d4'] >= 50] ddata = ddata.sort_values(['fips', 'valid_start']) fips = ddata.fips.unique() print(len(fips)) ddata = ddata.reset_index(drop=True) # #clean water_usage and merge (2010 only) df2 = pd.read_csv('water_usage.csv') wdata = df2.copy()[[ 'fips', 'state_fips', 'county_fips', 'population', 'd_totaluse' ]] mapping = [] for i in range(len(wdata)): if wdata.iloc[i]['fips'] in fips: mapping.append((wdata.iloc[i]['fips'], wdata.iloc[i]['d_totaluse'], wdata.iloc[i]['population']))
def _box_func(self): return lambda x: Timestamp(x, freq=self.freq, tz=self.tz)
def test_constructor_invalid(self): # invalid pytest.raises(TypeError, Float64Index, 0.) pytest.raises(TypeError, Float64Index, ['a', 'b', 0.]) pytest.raises(TypeError, Float64Index, [Timestamp('20130101')])
def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) if hasattr(freq, 'delta') and freq != Day(): # sub-Day Tick if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is None: start = start.tz_localize(tz, ambiguous=False) if end is not None and end.tz is None: end = end.tz_localize(tz, ambiguous=False) if start and end: if start.tz is None and end.tz is not None: start = start.tz_localize(end.tz, ambiguous=False) if end.tz is None and start.tz is not None: end = end.tz_localize(start.tz, ambiguous=False) if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) else: if tz is not None: # naive dates if start is not None and start.tz is not None: start = start.replace(tzinfo=None) if end is not None and end.tz is not None: end = end.replace(tzinfo=None) if start and end: if start.tz is None and end.tz is not None: end = end.replace(tzinfo=None) if end.tz is None and start.tz is not None: start = start.replace(tzinfo=None) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), tz, ambiguous=ambiguous) index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time start = start.tz_localize(tz) end = end.tz_localize(tz) arr = np.linspace(start.value, end.value, periods) index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) if not left_closed and len(index) and index[0] == start: index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] return cls._simple_new(index.values, freq=freq, tz=tz)
def actualizer(self, sch, after_date=None): if after_date is None: after_date = Timestamp.today() return sch[sch['Arrival datatime'] > after_date]
def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) start, end, _normalized = _maybe_normalize_endpoints( start, end, normalize) tz, inferred_tz = _infer_tz_from_endpoints(start, end, tz) if hasattr(freq, 'delta') and freq != Day(): # sub-Day Tick if inferred_tz is None and tz is not None: # naive dates if start is not None and start.tz is None: start = start.tz_localize(tz, ambiguous=False) if end is not None and end.tz is None: end = end.tz_localize(tz, ambiguous=False) if start and end: if start.tz is None and end.tz is not None: start = start.tz_localize(end.tz, ambiguous=False) if end.tz is None and start.tz is not None: end = end.tz_localize(start.tz, ambiguous=False) if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) else: if tz is not None: # naive dates if start is not None and start.tz is not None: start = start.replace(tzinfo=None) if end is not None and end.tz is not None: end = end.replace(tzinfo=None) if start and end: if start.tz is None and end.tz is not None: end = end.replace(tzinfo=None) if end.tz is None and start.tz is not None: start = start.replace(tzinfo=None) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc(ensure_int64( index.values), tz, ambiguous=ambiguous) index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time start = start.tz_localize(tz) end = end.tz_localize(tz) arr = np.linspace(start.value, end.value, periods) index = cls._simple_new(arr.astype('M8[ns]'), freq=None, tz=tz) if not left_closed and len(index) and index[0] == start: index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] return cls._simple_new(index.values, freq=freq, tz=tz)
def test_usecols_with_parse_dates(self): # See gh-9755 s = """a,b,c,d,e 0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] cols = { 'a': [0, 0], 'c_d': [ Timestamp('2014-01-01 09:00:00'), Timestamp('2014-01-02 10:00:00') ] } expected = DataFrame(cols, columns=['c_d', 'a']) df = self.read_csv(StringIO(s), usecols=[0, 2, 3], parse_dates=parse_dates) tm.assert_frame_equal(df, expected) df = self.read_csv(StringIO(s), usecols=[3, 0, 2], parse_dates=parse_dates) tm.assert_frame_equal(df, expected) # See gh-13604 s = """2008-02-07 09:40,1032.43 2008-02-07 09:50,1042.54 2008-02-07 10:00,1051.65 """ parse_dates = [0] names = ['date', 'values'] usecols = names[:] index = Index([Timestamp('2008-02-07 09:40'), Timestamp('2008-02-07 09:50'), Timestamp('2008-02-07 10:00')], name='date') cols = {'values': [1032.43, 1042.54, 1051.65]} expected = DataFrame(cols, index=index) df = self.read_csv(StringIO(s), parse_dates=parse_dates, index_col=0, usecols=usecols, header=None, names=names) tm.assert_frame_equal(df, expected) # See gh-14792 s = """a,b,c,d,e,f,g,h,i,j 2016/09/21,1,1,2,3,4,5,6,7,8""" parse_dates = [0] usecols = list('abcdefghij') cols = {'a': Timestamp('2016-09-21'), 'b': [1], 'c': [1], 'd': [2], 'e': [3], 'f': [4], 'g': [5], 'h': [6], 'i': [7], 'j': [8]} expected = DataFrame(cols, columns=usecols) df = self.read_csv(StringIO(s), usecols=usecols, parse_dates=parse_dates) tm.assert_frame_equal(df, expected) s = """a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8""" parse_dates = [[0, 1]] usecols = list('abcdefghij') cols = {'a_b': '2016/09/21 1', 'c': [1], 'd': [2], 'e': [3], 'f': [4], 'g': [5], 'h': [6], 'i': [7], 'j': [8]} expected = DataFrame(cols, columns=['a_b'] + list('cdefghij')) df = self.read_csv(StringIO(s), usecols=usecols, parse_dates=parse_dates) tm.assert_frame_equal(df, expected)
def _generate_range(cls, start, end, periods, freq, tz=None, normalize=False, ambiguous='raise', closed=None): if com.count_not_none(start, end, periods, freq) != 3: raise ValueError('Of the four parameters: start, end, periods, ' 'and freq, exactly three must be specified') freq = to_offset(freq) if start is not None: start = Timestamp(start) if end is not None: end = Timestamp(end) if start is None and end is None: if closed is not None: raise ValueError("Closed has to be None if not both of start" "and end are defined") left_closed, right_closed = dtl.validate_endpoints(closed) start, end, _normalized = _maybe_normalize_endpoints(start, end, normalize) tz, _ = _infer_tz_from_endpoints(start, end, tz) if tz is not None: # Localize the start and end arguments start = _maybe_localize_point( start, getattr(start, 'tz', None), start, freq, tz ) end = _maybe_localize_point( end, getattr(end, 'tz', None), end, freq, tz ) if start and end: # Make sure start and end have the same tz start = _maybe_localize_point( start, start.tz, end.tz, freq, tz ) end = _maybe_localize_point( end, end.tz, start.tz, freq, tz ) if freq is not None: if cls._use_cached_range(freq, _normalized, start, end): # Currently always False; never hit # Should be reimplemented as apart of GH 17914 index = cls._cached_range(start, end, periods=periods, freq=freq) else: index = _generate_regular_range(cls, start, end, periods, freq) if tz is not None and getattr(index, 'tz', None) is None: arr = conversion.tz_localize_to_utc( ensure_int64(index.values), tz, ambiguous=ambiguous) index = cls(arr) # index is localized datetime64 array -> have to convert # start/end as well to compare if start is not None: start = start.tz_localize(tz).asm8 if end is not None: end = end.tz_localize(tz).asm8 else: # Create a linearly spaced date_range in local time arr = np.linspace(start.value, end.value, periods) index = cls._simple_new( arr.astype('M8[ns]', copy=False), freq=None, tz=tz ) if not left_closed and len(index) and index[0] == start: index = index[1:] if not right_closed and len(index) and index[-1] == end: index = index[:-1] return cls._simple_new(index.values, freq=freq, tz=tz)