def _get_range_edges(first, last, offset, closed='left', base=0): if isinstance(offset, compat.string_types): offset = to_offset(offset) if isinstance(offset, Tick): is_day = isinstance(offset, Day) day_nanos = delta_to_nanoseconds(timedelta(1)) # #1165 if (is_day and day_nanos % offset.nanos == 0) or not is_day: return _adjust_dates_anchored(first, last, offset, closed=closed, base=base) if not isinstance(offset, Tick): # and first.time() != last.time(): # hack! first = first.normalize() last = last.normalize() if closed == 'left': first = Timestamp(offset.rollback(first)) else: first = Timestamp(first - offset) last = Timestamp(last + offset) return first, last
def test_usecols_with_parse_dates_and_usecol_names(self): # See gh-9755 s = """0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] names = list('acd') cols = { 'a': [0, 0], 'c_d': [ Timestamp('2014-01-01 09:00:00'), Timestamp('2014-01-02 10:00:00') ] } expected = DataFrame(cols, columns=['c_d', 'a']) df = self.read_csv(StringIO(s), names=names, usecols=[0, 2, 3], parse_dates=parse_dates) tm.assert_frame_equal(df, expected) df = self.read_csv(StringIO(s), names=names, usecols=[3, 0, 2], parse_dates=parse_dates) tm.assert_frame_equal(df, expected)
def _adjust_dates_anchored(first, last, offset, closed='right', base=0): # First and last offsets should be calculated from the start day to fix an # error cause by resampling across multiple days when a one day period is # not a multiple of the frequency. # # See https://github.com/pandas-dev/pandas/issues/8683 # 14682 - Since we need to drop the TZ information to perform # the adjustment in the presence of a DST change, # save TZ Info and the DST state of the first and last parameters # so that we can accurately rebuild them at the end. first_tzinfo = first.tzinfo last_tzinfo = last.tzinfo first_dst = bool(first.dst()) last_dst = bool(last.dst()) first = first.tz_localize(None) last = last.tz_localize(None) start_day_nanos = first.normalize().value base_nanos = (base % offset.n) * offset.nanos // offset.n start_day_nanos += base_nanos foffset = (first.value - start_day_nanos) % offset.nanos loffset = (last.value - start_day_nanos) % offset.nanos if closed == 'right': if foffset > 0: # roll back fresult = first.value - foffset else: fresult = first.value - offset.nanos if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: # already the end of the road lresult = last.value else: # closed == 'left' if foffset > 0: fresult = first.value - foffset else: # start of the road fresult = first.value if loffset > 0: # roll forward lresult = last.value + (offset.nanos - loffset) else: lresult = last.value + offset.nanos return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst), Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst))
def test_override__set_noconvert_columns(self): # GH 17351 - usecols needs to be sorted in _setnoconvert_columns # based on the test_usecols_with_parse_dates test from usecols.py from pandas.io.parsers import CParserWrapper, TextFileReader s = """a,b,c,d,e 0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] cols = { 'a': [0, 0], 'c_d': [ Timestamp('2014-01-01 09:00:00'), Timestamp('2014-01-02 10:00:00') ] } expected = DataFrame(cols, columns=['c_d', 'a']) class MyTextFileReader(TextFileReader): def __init__(self): self._currow = 0 self.squeeze = False class MyCParserWrapper(CParserWrapper): def _set_noconvert_columns(self): if self.usecols_dtype == 'integer': # self.usecols is a set, which is documented as unordered # but in practice, a CPython set of integers is sorted. # In other implementations this assumption does not hold. # The following code simulates a different order, which # before GH 17351 would cause the wrong columns to be # converted via the parse_dates parameter self.usecols = list(self.usecols) self.usecols.reverse() return CParserWrapper._set_noconvert_columns(self) parser = MyTextFileReader() parser.options = { 'usecols': [0, 2, 3], 'parse_dates': parse_dates, 'delimiter': ',' } parser._engine = MyCParserWrapper(StringIO(s), **parser.options) df = parser.read() tm.assert_frame_equal(df, expected)
def test_multiple_date_col_timestamp_parse(self): data = """05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25 05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25""" result = self.read_csv(StringIO(data), sep=',', header=None, parse_dates=[[0, 1]], date_parser=Timestamp) ex_val = Timestamp('05/31/2012 15:30:00.029') assert result['0_1'][0] == ex_val
def test_index_groupby(self): int_idx = Index(range(6)) float_idx = Index(np.arange(0, 0.6, 0.1)) obj_idx = Index('A B C D E F'.split()) dt_idx = pd.date_range('2013-01-01', freq='M', periods=6) for idx in [int_idx, float_idx, obj_idx, dt_idx]: to_groupby = np.array([1, 2, np.nan, np.nan, 2, 1]) tm.assert_dict_equal(idx.groupby(to_groupby), {1.0: idx[[0, 5]], 2.0: idx[[1, 4]]}) to_groupby = Index([datetime(2011, 11, 1), datetime(2011, 12, 1), pd.NaT, pd.NaT, datetime(2011, 12, 1), datetime(2011, 11, 1)], tz='UTC').values ex_keys = [Timestamp('2011-11-01'), Timestamp('2011-12-01')] expected = {ex_keys[0]: idx[[0, 5]], ex_keys[1]: idx[[1, 4]]} tm.assert_dict_equal(idx.groupby(to_groupby), expected)
def test_constructor_invalid(self): # invalid pytest.raises(TypeError, Float64Index, 0.) pytest.raises(TypeError, Float64Index, ['a', 'b', 0.]) pytest.raises(TypeError, Float64Index, [Timestamp('20130101')])
def test_usecols_with_parse_dates(self): # See gh-9755 s = """a,b,c,d,e 0,1,20140101,0900,4 0,1,20140102,1000,4""" parse_dates = [[1, 2]] cols = { 'a': [0, 0], 'c_d': [ Timestamp('2014-01-01 09:00:00'), Timestamp('2014-01-02 10:00:00') ] } expected = DataFrame(cols, columns=['c_d', 'a']) df = self.read_csv(StringIO(s), usecols=[0, 2, 3], parse_dates=parse_dates) tm.assert_frame_equal(df, expected) df = self.read_csv(StringIO(s), usecols=[3, 0, 2], parse_dates=parse_dates) tm.assert_frame_equal(df, expected) # See gh-13604 s = """2008-02-07 09:40,1032.43 2008-02-07 09:50,1042.54 2008-02-07 10:00,1051.65 """ parse_dates = [0] names = ['date', 'values'] usecols = names[:] index = Index([ Timestamp('2008-02-07 09:40'), Timestamp('2008-02-07 09:50'), Timestamp('2008-02-07 10:00') ], name='date') cols = {'values': [1032.43, 1042.54, 1051.65]} expected = DataFrame(cols, index=index) df = self.read_csv(StringIO(s), parse_dates=parse_dates, index_col=0, usecols=usecols, header=None, names=names) tm.assert_frame_equal(df, expected) # See gh-14792 s = """a,b,c,d,e,f,g,h,i,j 2016/09/21,1,1,2,3,4,5,6,7,8""" parse_dates = [0] usecols = list('abcdefghij') cols = { 'a': Timestamp('2016-09-21'), 'b': [1], 'c': [1], 'd': [2], 'e': [3], 'f': [4], 'g': [5], 'h': [6], 'i': [7], 'j': [8] } expected = DataFrame(cols, columns=usecols) df = self.read_csv(StringIO(s), usecols=usecols, parse_dates=parse_dates) tm.assert_frame_equal(df, expected) s = """a,b,c,d,e,f,g,h,i,j\n2016/09/21,1,1,2,3,4,5,6,7,8""" parse_dates = [[0, 1]] usecols = list('abcdefghij') cols = { 'a_b': '2016/09/21 1', 'c': [1], 'd': [2], 'e': [3], 'f': [4], 'g': [5], 'h': [6], 'i': [7], 'j': [8] } expected = DataFrame(cols, columns=['a_b'] + list('cdefghij')) df = self.read_csv(StringIO(s), usecols=usecols, parse_dates=parse_dates) tm.assert_frame_equal(df, expected)
def test_constructor_invalid(self): # invalid self.assertRaises(TypeError, Float64Index, 0.) self.assertRaises(TypeError, Float64Index, ['a', 'b', 0.]) self.assertRaises(TypeError, Float64Index, [Timestamp('20130101')])