def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(0, 0) etime = time(1, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert t >= stime else: assert t > stime if inc_end: assert t <= etime else: assert t < etime result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) assert_series_equal(result, expected) # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(22, 0) etime = time(9, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 assert len(filtered) == exp_len for rs in filtered.index: t = rs.time() if inc_start: assert (t >= stime) or (t <= etime) else: assert (t > stime) or (t <= etime) if inc_end: assert (t <= etime) or (t >= stime) else: assert (t < etime) or (t >= stime)
def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(0, 0) etime = time(1, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 self.assertEqual(len(filtered), exp_len) for rs in filtered.index: t = rs.time() if inc_start: self.assertTrue(t >= stime) else: self.assertTrue(t > stime) if inc_end: self.assertTrue(t <= etime) else: self.assertTrue(t < etime) result = ts.between_time('00:00', '01:00') expected = ts.between_time(stime, etime) assert_series_equal(result, expected) # across midnight rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(22, 0) etime = time(9, 0) close_open = product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = (12 * 11 + 1) * 4 + 1 if not inc_start: exp_len -= 4 if not inc_end: exp_len -= 4 self.assertEqual(len(filtered), exp_len) for rs in filtered.index: t = rs.time() if inc_start: self.assertTrue((t >= stime) or (t <= etime)) else: self.assertTrue((t > stime) or (t <= etime)) if inc_end: self.assertTrue((t <= etime) or (t >= stime)) else: self.assertTrue((t < etime) or (t >= stime))
def test_between_time_axis(self): # issue 8839 rng = date_range('1/1/2000', periods=100, freq='10min') ts = Series(np.random.randn(len(rng)), index=rng) stime, etime = ('08:00:00', '09:00:00') expected_length = 7 assert len(ts.between_time(stime, etime)) == expected_length assert len(ts.between_time(stime, etime, axis=0)) == expected_length pytest.raises(ValueError, ts.between_time, stime, etime, axis=1)
def test_between_time_axis(self): # issue 8839 rng = date_range('1/1/2000', periods=100, freq='10min') ts = Series(np.random.randn(len(rng)), index=rng) stime, etime = ('08:00:00', '09:00:00') expected_length = 7 assert len(ts.between_time(stime, etime)) == expected_length assert len(ts.between_time(stime, etime, axis=0)) == expected_length msg = r"No axis named 1 for object type <(class|type) 'type'>" with pytest.raises(ValueError, match=msg): ts.between_time(stime, etime, axis=1)
def test_between_time_axis(self): # issue 8839 rng = date_range("1/1/2000", periods=100, freq="10min") ts = Series(np.random.randn(len(rng)), index=rng) stime, etime = ("08:00:00", "09:00:00") expected_length = 7 assert len(ts.between_time(stime, etime)) == expected_length assert len(ts.between_time(stime, etime, axis=0)) == expected_length msg = "No axis named 1 for object type <class 'pandas.core.series.Series'>" with pytest.raises(ValueError, match=msg): ts.between_time(stime, etime, axis=1)
def test_between_time_types(self): # GH11818 rng = date_range("1/1/2000", "1/5/2000", freq="5min") msg = r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\] to a time" with pytest.raises(ValueError, match=msg): rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({"A": 0}, index=rng) with pytest.raises(ValueError, match=msg): frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) with pytest.raises(ValueError, match=msg): series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def test_between_time_axis(self, frame_or_series): # GH#8839 rng = date_range("1/1/2000", periods=100, freq="10min") ts = Series(np.random.randn(len(rng)), index=rng) if frame_or_series is DataFrame: ts = ts.to_frame() stime, etime = ("08:00:00", "09:00:00") expected_length = 7 assert len(ts.between_time(stime, etime)) == expected_length assert len(ts.between_time(stime, etime, axis=0)) == expected_length msg = f"No axis named {ts.ndim} for object type {type(ts).__name__}" with pytest.raises(ValueError, match=msg): ts.between_time(stime, etime, axis=ts.ndim)
def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') with pytest.raises(ValueError): rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) with pytest.raises(ValueError): frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) with pytest.raises(ValueError): series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def test_between_time(self): rng = date_range('1/1/2000', '1/5/2000', freq='5min') ts = Series(np.random.randn(len(rng)), index=rng) stime = time(0, 0) etime = time(1, 0) close_open = itertools.product([True, False], [True, False]) for inc_start, inc_end in close_open: filtered = ts.between_time(stime, etime, inc_start, inc_end) exp_len = 13 * 4 + 1 if not inc_start: exp_len -= 5 if not inc_end: exp_len -= 4 self.assert_(len(filtered) == exp_len) for rs in filtered.index: t = rs.time() if inc_start: self.assert_(t >= stime) else: self.assert_(t > stime) if inc_end: self.assert_(t <= etime) else: self.assert_(t < etime)
def test_between_time_types(self): # GH11818 rng = date_range('1/1/2000', '1/5/2000', freq='5min') msg = (r"Cannot convert arg \[datetime\.datetime\(2010, 1, 2, 1, 0\)\]" " to a time") with pytest.raises(ValueError, match=msg): rng.indexer_between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) frame = DataFrame({'A': 0}, index=rng) with pytest.raises(ValueError, match=msg): frame.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5)) series = Series(0, index=rng) with pytest.raises(ValueError, match=msg): series.between_time(datetime(2010, 1, 2, 1), datetime(2010, 1, 2, 5))
def test_localized_between_time(self, tzstr): tz = timezones.maybe_get_tz(tzstr) rng = date_range("4/16/2012", "5/1/2012", freq="H") ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize(tzstr) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(tzstr) tm.assert_series_equal(result, expected) assert timezones.tz_compare(result.index.tz, tz)
def test_localized_at_time_between_time(self): from datetime import time rng = date_range('4/16/2012', '5/1/2012', freq='H') ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize('US/Eastern') result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize('US/Eastern') assert_series_equal(result, expected) self.assert_(result.index.tz.zone == 'US/Eastern') t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize('US/Eastern') assert_series_equal(result, expected) self.assert_(result.index.tz.zone == 'US/Eastern')
def test_localized_at_time_between_time(self): from datetime import time rng = date_range("4/16/2012", "5/1/2012", freq="H") ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize(self.tzstr("US/Eastern")) result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize(self.tzstr("US/Eastern")) tm.assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz("US/Eastern"))) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(self.tzstr("US/Eastern")) tm.assert_series_equal(result, expected) self.assertTrue(self.cmptz(result.index.tz, self.tz("US/Eastern")))
def test_localized_at_time_between_time(self, tzstr): from datetime import time tz = timezones.maybe_get_tz(tzstr) rng = date_range('4/16/2012', '5/1/2012', freq='H') ts = Series(np.random.randn(len(rng)), index=rng) ts_local = ts.tz_localize(tzstr) result = ts_local.at_time(time(10, 0)) expected = ts.at_time(time(10, 0)).tz_localize(tzstr) tm.assert_series_equal(result, expected) assert timezones.tz_compare(result.index.tz, tz) t1, t2 = time(10, 0), time(11, 0) result = ts_local.between_time(t1, t2) expected = ts.between_time(t1, t2).tz_localize(tzstr) tm.assert_series_equal(result, expected) assert timezones.tz_compare(result.index.tz, tz)
index=pd.period_range('1984Q2', periods=7, freq='Q-SEP')) inf1 = Series([0.025, 0.045, 0.037, 0.04], index=pd.period_range('1982', periods=4, freq='A-DEC')) inf1_q = inf1.asfreq('Q-SEP', how='end') print(inf1_q) print(inf1_q.reindex(gdp.index, method='ffill')) rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) ts = Series(np.arange(len(rng), dtype=float), index=rng) print(ts) print(ts[time(10, 0)]) print(ts.at_time(time(10, 0))) print(ts.between_time(time(10, 0), time(10, 1))) indexer = np.sort(np.random.permutation(len(ts))[700:]) irr_ts = ts.copy() irr_ts[indexer] = np.nan print(irr_ts['2012-06-01 09:50':'2012-06-01 10:00']) selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B') print(irr_ts.asof(selection)) data1 = DataFrame(np.ones((6, 3), dtype=float), columns=['a', 'b', 'c'], index=pd.date_range('6/12/2012', periods=6)) data2 = DataFrame(np.ones((6, 3), dtype=float) * 2, columns=['a', 'b', 'c'], index=pd.date_range('6/13/2012', periods=6))
k = infl_q.reindex(gdp.index, method='ffill') print(k) #时间和最当前数据提取 rng = pd.date_range('2012-06-01 09:30', '2012-06-01 15:59', freq='T') # 交易时段按分钟采样 rng = rng.append([rng + pd.offsets.BDay(i) for i in range(1, 4)]) # 再补4天 ts = Series(np.arange(len(rng), dtype=float), index=rng) print(ts.head()) print(ts.tail()) #利用python的datetime.time对象进行索引即可抽取出这些时间点上的值 print(ts[time(10, 0)]) #抽取10点的数据 print(ts.at_time(time(10, 0))) #between_time方法,它用于选取两个Time对象之间的值 j = ts.between_time(time(10, 0), time(10, 1)) #定位到时间段 print(j) #如果刚好没有数据落在某个具体时间上,希望得到上午10点之前最后出现的值 #根据ts随机选个排列,并选择700条及以后的数据进行排序 indexer = np.sort(np.random.permutation(len(ts))[700:]) irr_ts = ts.copy() #根据随机下标索引把一部分时间点数据设置为NA irr_ts[indexer] = np.nan print(irr_ts['2012-06-01 09:50':'2012-06-01 10:00']) #如果将一组Timestamp传入asof方法,就能得到这些时间点处的有效值 #连续四个工作日的上午10点 selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B') #上面随机几次,确保某个10点数据为NA.asof的话会拿最近数据填充 irr_ts.asof(selection)
class HisRecord(): """ This class is a single record - hisId is the haystack Id of the trend - data is created as DataFrame to be used directly in Pandas """ def __init__(self, session, hisId, dateTimeRange='today'): """ GET data from server and fill this object with historical info """ # Grab logger child from session self._log = session._log.getChild('hisRecord.%s' % hisId) # Grab metadata self._meta = session.getHistMeta(hisId) self.tz_name = self._meta['tz'] self.tz = zoneinfo.timezone(self.tz_name) # Is dateTimeRange a tuple object? if isinstance(dateTimeRange, tuple): (dtStart, dtEnd) = dateTimeRange # Convert these to native time def _to_native(dt): self._log.debug('Converting %s to native time', dt) if isinstance(dt, datetime.datetime): if dt.tzinfo is None: # Assume time is already local self._log.debug('Localise to timezone %s', self.tz_name) dt = self.tz.localize(dt) else: self._log.debug('Convert to timezone %s', self.tz_name) dt = dt.astimezone(self.tz) return '%s %s' % (dt.isoformat(), self.tz_name) elif isinstance(dt, datetime.date): return dt.isoformat() else: return dt dateTimeRange = '%s,%s' % (_to_native(dtStart), _to_native(dtEnd)) self.hisId = hisId self.name = self._meta.get('name') result = session._get_grid('hisRead', id='@%s' % self.hisId, range=dateTimeRange) self._log.debug('Received result set: %s', result) # Convert the list of {ts: foo, val: bar} dicts to a pair of # lists. if bool(result): strip_unit = lambda v: v.value if isinstance(v, Quantity) else v (index, values) = zip(*map(lambda row : \ (row['ts'], strip_unit(row['val'])), result)) else: # No data (index, values) = ([], []) try: #Declare Series converted to local time for session self.data = Series(values, index=index).tz_convert(session.timezone) #Renaming index so the name will be part of the serie self.data = self.data.reindex(self.data.index.rename([self.name])) except: self._log.error('%s is an Unknown history type', self.hisId) raise def plot(self): """ Draw a graph of the DataFrame """ self.data.plot() def breakdownPlot(self, startTime='08:00', endTime='17:00', bins=np.array([ 0, 0.5, 1, 18.0, 18.5, 19.0, 19.5, 20.0, 20.5, 21.0, 21.5, 22.0, 22.5, 23.0, 23.5, 24.0, 24.5, 25.0 ])): """ By default, creates a breakdown plot of temperature distribution between 18 and 25 bins (distribution) can be past as argument By default, takes values between 8:00 and 17:00 startTime = string representation of time (ex. '08:00') endtime = string representation of time (ex. '17:00') bin = np.array representing distribution """ x = self.data.between_time(startTime, endTime) barplot = pd.cut(x.dropna(), bins) x.groupby(barplot).size().plot(kind='bar') #self.data.groupby(barplot).size() def simpleStats(self): """ Shortcut for describe() pandas version """ return self.data.describe() def __str__(self): return 'History Record of %s' % self.name def __repr__(self): return 'pyhaystack History Record of %s' % self.name
# 时间和'最当前'数据选取 # 对于长时间市场数据,选取每天特定时间的价格数据 # 对于数据不规整, 要数据规整 rng = pd.date_range('2012-06-01 09:30', '2012-06-06 15:59', freq='T') ts = Series(np.arange(len(rng), dtype=float), index=rng) print ts # 十点 print ts[time(10, 0)] # 等价于 # 可以使用实例方法at_time print ts.at_time(time(10, 0)) # 选取两个Time对象之间的值 # 选取10:00到10:01之间 print ts.between_time(time(10, 0), time(10, 1)) # 得到上午10之前最后出现的那个值 indexer = np.sort(np.random.permutation(len(ts))[700:]) irr_ts = ts.copy() irr_ts[indexer] = np.nan print irr_ts['2012-06-01 09:50': '2012-06-01 10:00'] # 如果将一组Timestamp传入asof方法, 就可以得到这些时间点处的有效值(非NA) selection = pd.date_range('2012-06-01 10:00', periods=4, freq='B') print irr_ts.asof(selection)
class HisRecord(): """ This class is a single record - hisId is the haystack Id of the trend - data is created as DataFrame to be used directly in Pandas """ def __init__(self,session,hisId,dateTimeRange='today'): """ GET data from server and fill this object with historical info """ self.hisId = hisId self.name = self.getHisNameFromId(session,self.hisId) index = [] values = [] for eachRows in session.read('hisRead?id='+self.hisId+'&range='+dateTimeRange)['rows']: index.append(pd.Timestamp(pd.to_datetime(datetime.datetime(*map(int, re.split('[^\d]', eachRows['ts'].split(' ')[0])[:-2]))))) #This will allow conversion of Enum value to float so Pandas will work if (eachRows['val'] == 'F'): values.append(False) elif (eachRows['val'] == 'T'): values.append(True) # regex coding here to extract float value when units are part of value (ex. 21.8381°C) elif tools.isfloat(re.findall(r"[-+]?\d*\.*\d+", eachRows['val'])[0]): values.append(float(re.findall(r"[-+]?\d*\.*\d+", eachRows['val'])[0])) else: values.append(eachRows['val']) try: #Declare Series and localize using Site Timezone self.data = Series(values,index=index).tz_localize(session.timezone) #Renaming index so the name will be part of the serie self.data = self.data.reindex(self.data.index.rename([self.name])) except Exception: print('%s is an Unknown history type' % self.hisId) def getHisNameFromId(self,session,pointId): """ Retrieve name from id of an history """ for each in session.read("read?filter=his")['rows']: if each['id'].split(' ',1)[0] == pointId: return (each['id'].split(' ',1)[1]) return 'Id Not found' def plot(self): """ Draw a graph of the DataFrame """ self.data.plot() def breakdownPlot(self, startTime = '08:00', endTime = '17:00', bins=np.array([0,0.5,1,18.0,18.5,19.0,19.5,20.0,20.5,21.0,21.5,22.0,22.5,23.0, 23.5, 24.0, 24.5,25.0])): """ By default, creates a breakdown plot of temperature distribution between 18 and 25 bins (distribution) can be past as argument By default, takes values between 8:00 and 17:00 startTime = string representation of time (ex. '08:00') endtime = string representation of time (ex. '17:00') bin = np.array representing distribution """ x = self.data.between_time(startTime,endTime) barplot = pd.cut(x.dropna(),bins) x.groupby(barplot).size().plot(kind='bar') #self.data.groupby(barplot).size() def simpleStats(self): """ Shortcut for describe() pandas version """ return self.data.describe() def __str__(self): return 'History Record of %s' % self.name
def test_between_time_raises(self): # GH20725 ser = Series("a b c".split()) msg = "Index must be DatetimeIndex" with pytest.raises(TypeError, match=msg): ser.between_time(start_time="00:00", end_time="12:00")