def setUp(self): date_index = DateRange(datetime(2009, 12, 11), periods=3, offset=datetools.bday) ts = Series([3, 1, 4], index=date_index) self.TS1 = ts date_index = DateRange(datetime(2009, 12, 11), periods=5, offset=datetools.bday) ts = Series([1, 5, 9, 2, 6], index=date_index) self.TS2 = ts date_index = DateRange(datetime(2009, 12, 11), periods=3, offset=datetools.bday) ts = Series([5, np.nan, 3], index=date_index) self.TS3 = ts date_index = DateRange(datetime(2009, 12, 11), periods=5, offset=datetools.bday) ts = Series([np.nan, 5, 8, 9, 7], index=date_index) self.TS4 = ts data = {'x1' : self.TS2, 'x2' : self.TS4} self.DF1 = DataFrame(data=data) data = {'x1' : self.TS2, 'x2' : self.TS4} self.DICT1 = data
def get_metric_statistics(self, project_id, end_time, metric_name, namespace, period, start_time, statistics, unit=None, dimensions=None): """ 입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다. """ def to_datapoint(df, idx): datapoint = df.ix[idx].dropna() if len(datapoint): return idx, datapoint end_idx = end_time.replace(second=0, microsecond=0) start_idx = start_time.replace(second=0, microsecond=0) start_ana_idx = start_idx - datetools.Minute() * (period / 60) daterange = DateRange(start_idx, end_idx, offset=datetools.Minute()) daterange_ana = DateRange(start_ana_idx, end_idx, offset=datetools.Minute()) # load default unit for metric from database if unit == "None" or not unit: metric_key = self.cass.get_metric_key(project_id=project_id, namespace=namespace, metric_name=metric_name, dimensions=dimensions) if metric_key: unit = self.cass.get_metric_unit(metric_key) else: unit = "None" # load statistics data from database stats = self.cass.get_metric_statistics(project_id=project_id, namespace=namespace, metric_name=metric_name, start_time=start_ana_idx, end_time=end_time, period=period, statistics=statistics, dimensions=dimensions) period = period / 60 # convert sec to min stat = DataFrame(index=daterange) for statistic, series in zip(statistics, stats): func = self.ROLLING_FUNC_MAP[statistic] ts = TimeSeries(series, index=daterange_ana) rolled_ts = func(ts, period, min_periods=0) stat[statistic] = rolled_ts.ix[::period] LOG.debug("stat %s\n%s" % (statistic, stat[statistic])) ret = filter(None, (to_datapoint(stat, i) for i in stat.index)) return ret, unit
def rountrip_archive(N, K=50, iterations=10): # Create data arr = np.random.randn(N, K) # lar = la.larry(arr) dma = pandas.DataFrame( arr, DateRange('1/1/2000', periods=N, offset=datetools.Minute())) dma[201] = 'bar' # filenames filename_numpy = '/Users/wesm/tmp/numpy.npz' filename_larry = '/Users/wesm/tmp/archive.hdf5' filename_pandas = '/Users/wesm/tmp/pandas_tmp' # Delete old files try: os.unlink(filename_numpy) except: pass try: os.unlink(filename_larry) except: pass try: os.unlink(filename_pandas) except: pass # Time a round trip save and load # numpy_f = lambda: numpy_roundtrip(filename_numpy, arr, arr) # numpy_time = timeit(numpy_f, iterations) / iterations # larry_f = lambda: larry_roundtrip(filename_larry, lar, lar) # larry_time = timeit(larry_f, iterations) / iterations pandas_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pandas_time = timeit(pandas_f, iterations) / iterations print('pandas (HDF5) %7.4f seconds' % pandas_time) pickle_f = lambda: pandas_roundtrip(filename_pandas, dma, dma) pickle_time = timeit(pickle_f, iterations) / iterations print('pandas (pickle) %7.4f seconds' % pickle_time) # print('Numpy (npz) %7.4f seconds' % numpy_time) # print('larry (HDF5) %7.4f seconds' % larry_time) # Delete old files try: os.unlink(filename_numpy) except: pass try: os.unlink(filename_larry) except: pass try: os.unlink(filename_pandas) except: pass
def _make_predict_dates(self): data = self.data dtstart = data.predict_start dtend = data.predict_end freq = data.freq if freq is not None: pandas_freq = _freq_to_pandas[freq] try: from pandas import DatetimeIndex dates = DatetimeIndex(start=dtstart, end=dtend, freq=pandas_freq) except ImportError as err: from pandas import DateRange dates = DateRange(dtstart, dtend, offset = pandas_freq).values # handle elif freq is None and (isinstance(dtstart, int) and isinstance(dtend, int)): from pandas import Index dates = Index(lrange(dtstart, dtend+1)) # if freq is None and dtstart and dtend aren't integers, we're # in sample else: dates = self.data.dates start = self._get_dates_loc(dates, dtstart) end = self._get_dates_loc(dates, dtend) dates = dates[start:end+1] # is this index inclusive? self.data.predict_dates = dates
def setUp(self): self.data = { 'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'C': np.arange(10), 'D': [0, 1, 2, 3, 4, 5, nan, nan, nan, nan] } self.dates = DateRange('1/1/2011', periods=10) self.frame = SparseDataFrame(self.data, index=self.dates) self.iframe = SparseDataFrame(self.data, index=self.dates, default_kind='integer') values = self.frame.values.copy() values[np.isnan(values)] = 0 self.zframe = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=0, index=self.dates) values = self.frame.values.copy() values[np.isnan(values)] = 2 self.fill_frame = SparseDataFrame(values, columns=['A', 'B', 'C', 'D'], default_fill_value=2, index=self.dates) self.empty = SparseDataFrame()
def parse_lutkepohl_data(path): # pragma: no cover """ Parse data files from Lutkepohl (2005) book Source for data files: www.jmulti.de """ from collections import deque from datetime import datetime import pandas import pandas.core.datetools as dt import re from statsmodels.compatnp.py3k import asbytes regex = re.compile(asbytes('<(.*) (\w)([\d]+)>.*')) lines = deque(open(path, 'rb')) to_skip = 0 while asbytes('*/') not in lines.popleft(): #while '*/' not in lines.popleft(): to_skip += 1 while True: to_skip += 1 line = lines.popleft() m = regex.match(line) if m: year, freq, start_point = m.groups() break data = np.genfromtxt(path, names=True, skip_header=to_skip+1) n = len(data) # generate the corresponding date range (using pandas for now) start_point = int(start_point) year = int(year) offsets = { asbytes('Q') : dt.BQuarterEnd(), asbytes('M') : dt.BMonthEnd(), asbytes('A') : dt.BYearEnd() } # create an instance offset = offsets[freq] inc = offset * (start_point - 1) start_date = offset.rollforward(datetime(year, 1, 1)) + inc offset = offsets[freq] try: from pandas import DatetimeIndex # pylint: disable=E0611 date_range = DatetimeIndex(start=start_date, freq=offset, periods=n) except ImportError: from pandas import DateRange date_range = DateRange(start_date, offset=offset, periods=n) return data, date_range
def _get_range(self): now_idx = datetime.utcnow().replace(second=0, microsecond=0) start = now_idx - timedelta(seconds=self.left_offset) end = now_idx + timedelta(seconds=self.right_offset) daterange = DateRange(start, end, offset=datetools.Minute()) return daterange
def _make_predict_dates(self): from pandas import DateRange data = self._data dtstart = data.predict_start dtend = data.predict_end freq = data.freq pandas_freq = _freq_to_pandas[freq] dates = DateRange(dtstart, dtend, offset=pandas_freq).values self._data.predict_dates = dates
def test_predict_freq(): # test that predicted dates have same frequency x = np.arange(1, 36.) if _pandas_08x: from pandas import date_range # there's a bug in pandas up to 0.10.2 for YearBegin #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR") dates = date_range("1972-4-30", "2006-4-30", freq="A-APR") series = Series(x, index=dates) model = TimeSeriesModel(series) #npt.assert_(model.data.freq == "AS-APR") npt.assert_(model.data.freq == "A-APR") start = model._get_predict_start("2006-4-30") end = model._get_predict_end("2016-4-30") model._make_predict_dates() predict_dates = model.data.predict_dates #expected_dates = date_range("2006-12-31", "2016-12-31", # freq="AS-APR") expected_dates = date_range("2006-4-30", "2016-4-30", freq="A-APR") npt.assert_equal(predict_dates, expected_dates) #ptesting.assert_series_equal(predict_dates, expected_dates) else: from pandas import DateRange, datetools dates = DateRange("1972-1-1", "2007-1-1", offset=datetools.yearEnd) series = Series(x, index=dates) model = TimeSeriesModel(series) npt.assert_(model.data.freq == "A") start = model._get_predict_start("2006-12-31") end = model._get_predict_end("2016-12-31") model._make_predict_dates() predict_dates = model.data.predict_dates expected_dates = DateRange("2006-12-31", "2016-12-31", offset=datetools.yearEnd) npt.assert_array_equal(predict_dates, expected_dates)
def test_timeseries_preepoch(self): if sys.version_info[0] == 2 and sys.version_info[1] < 7: raise nose.SkipTest dr = DateRange('1/1/1940', '1/1/1960') ts = Series(np.random.randn(len(dr)), index=dr) try: self._check_roundtrip(ts, tm.assert_series_equal) except OverflowError: raise nose.SkipTest('known failer on some windows platforms')
def panel_data2(): index = DateRange('1/1/2011', periods=9) return DataFrame( { 'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5], 'B': [0, 1, 2, 3, 4, 5, nan, nan, nan], 'C': [0, 1, 2, nan, nan, nan, 3, 4, 5], 'D': [nan, 0, 1, nan, 2, 3, 4, 5, nan] }, index=index)
def panel_data3(): index = DateRange('1/1/2011', periods=10).shift(-2) return DataFrame( { 'A': [nan, nan, nan, 0, 1, 2, 3, 4, 5, 6], 'B': [0, 1, 2, 3, 4, 5, 6, nan, nan, nan], 'C': [0, 1, 2, nan, nan, nan, 3, 4, 5, 6], 'D': [nan, 0, 1, nan, 2, 3, 4, 5, 6, nan] }, index=index)
def _make_predict_dates(self): data = self._data dtstart = data.predict_start dtend = data.predict_end freq = data.freq pandas_freq = _freq_to_pandas[freq] try: from pandas import DatetimeIndex dates = DatetimeIndex(start=dtstart, end=dtend, freq=pandas_freq) except ImportError, err: from pandas import DateRange dates = DateRange(dtstart, dtend, offset=pandas_freq).values
def test_infer_freq(): from pandas import DateRange d1 = datetime(2008, 12, 31) d2 = datetime(2012, 9, 30) b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values assert _infer_freq(b[2:5]) == 'B' assert _infer_freq(b[:3]) == 'D' assert _infer_freq(b) == 'B' assert _infer_freq(d) == 'D' assert _infer_freq(w) == 'W' assert _infer_freq(m) == 'M' assert _infer_freq(a) == 'A' assert _infer_freq(q) == 'Q' assert _infer_freq(d[:3]) == 'D' assert _infer_freq(w[:3]) == 'W' assert _infer_freq(m[:3]) == 'M' assert _infer_freq(a[:3]) == 'A' assert _infer_freq(q[:3]) == 'Q'
def test_infer_freq(): from pandas import DateRange d1 = datetime(2008, 12, 31) d2 = datetime(2012, 9, 30) b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values npt.assert_string_equal(_infer_freq(b), 'B') npt.assert_string_equal(_infer_freq(d), 'D') npt.assert_string_equal(_infer_freq(w), 'W') npt.assert_string_equal(_infer_freq(m), 'M') npt.assert_string_equal(_infer_freq(a), 'A') npt.assert_string_equal(_infer_freq(q), 'Q') npt.assert_string_equal(_infer_freq(b[2:4]), 'B') npt.assert_string_equal(_infer_freq(b[:2]), 'D') npt.assert_string_equal(_infer_freq(d[:2]), 'D') npt.assert_string_equal(_infer_freq(w[:2]), 'W') npt.assert_string_equal(_infer_freq(m[:2]), 'M') npt.assert_string_equal(_infer_freq(a[:2]), 'A') npt.assert_string_equal(_infer_freq(q[:2]), 'Q')
def _idx_from_dates(d1, d2, freq): """ Returns an index offset from datetimes d1 and d2. d1 is expected to be the last date in a date series and d2 is the out of sample date. Notes ----- Rounds down the index if the end date is before the next date at freq. Does not check the start date to see whether it is on the offest but assumes that it is. """ from pandas import DateRange return len(DateRange(d1, d2, offset=_freq_to_pandas[freq])) - 1
def test_setitem_ndarray(self): from pandas import DateRange, datetools timeidx = DateRange(start=datetime(2009,1,1), end=datetime(2009,12,31), offset=datetools.MonthEnd()) lons_coarse = np.linspace(-177.5, 177.5, 72) lats_coarse = np.linspace(-87.5, 87.5, 36) P = Panel(items=timeidx, major_axis=lons_coarse, minor_axis=lats_coarse) data = np.random.randn(72*36).reshape((72,36)) key = datetime(2009,2,28) P[key] = data assert_almost_equal(P[key].values, data)
def test_keyerror_start_date(): x = np.arange(1, 36.) if _pandas_08x: from pandas import date_range # there's a bug in pandas up to 0.10.2 for YearBegin #dates = date_range("1972-4-1", "2007-4-1", freq="AS-APR") dates = date_range("1972-4-30", "2006-4-30", freq="A-APR") series = Series(x, index=dates) model = TimeSeriesModel(series) else: from pandas import DateRange, datetools dates = DateRange("1972-1-1", "2007-1-1", offset=datetools.yearEnd) series = Series(x, index=dates) model = TimeSeriesModel(series) npt.assert_raises(ValueError, model._get_predict_start, "1970-4-30")
def test_shift(self): series = SparseSeries([nan, 1., 2., 3., nan, nan], index=np.arange(6)) shifted = series.shift(0) self.assert_(shifted is not series) assert_sp_series_equal(shifted, series) f = lambda s: s.shift(1) _dense_series_compare(series, f) f = lambda s: s.shift(-2) _dense_series_compare(series, f) series = SparseSeries([nan, 1., 2., 3., nan, nan], index=DateRange('1/1/2000', periods=6)) f = lambda s: s.shift(2, timeRule='WEEKDAY') _dense_series_compare(series, f) f = lambda s: s.shift(2, offset=datetools.bday) _dense_series_compare(series, f)
def test_constructor(self): # test setup guys self.assert_(np.isnan(self.bseries.fill_value)) self.assert_(isinstance(self.bseries.sp_index, BlockIndex)) self.assert_(np.isnan(self.iseries.fill_value)) self.assert_(isinstance(self.iseries.sp_index, IntIndex)) self.assertEquals(self.zbseries.fill_value, 0) assert_equal(self.zbseries.values, self.bseries.to_dense().fillna(0)) # pass SparseSeries s2 = SparseSeries(self.bseries) s3 = SparseSeries(self.iseries) s4 = SparseSeries(self.zbseries) assert_sp_series_equal(s2, self.bseries) assert_sp_series_equal(s3, self.iseries) assert_sp_series_equal(s4, self.zbseries) # Sparse time series works date_index = DateRange('1/1/2000', periods=len(self.bseries)) s5 = SparseSeries(self.bseries, index=date_index) self.assert_(isinstance(s5, SparseTimeSeries)) # pass Series bseries2 = SparseSeries(self.bseries.to_dense()) assert_equal(self.bseries.sp_values, bseries2.sp_values) # pass dict? # don't copy the data by default values = np.ones(len(self.bseries.sp_values)) sp = SparseSeries(values, sparse_index=self.bseries.sp_index) sp.sp_values[:5] = 97 self.assert_(values[0] == 97) # but can make it copy! sp = SparseSeries(values, sparse_index=self.bseries.sp_index, copy=True) sp.sp_values[:5] = 100 self.assert_(values[0] == 97)
def setUp(self): arr, index = _test_data1() date_index = DateRange('1/1/2011', periods=len(index)) self.bseries = SparseSeries(arr, index=index, kind='block') self.bseries.name = 'bseries' self.ts = self.bseries self.btseries = SparseSeries(arr, index=date_index, kind='block') self.iseries = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data2() self.bseries2 = SparseSeries(arr, index=index, kind='block') self.iseries2 = SparseSeries(arr, index=index, kind='integer') arr, index = _test_data1_zero() self.zbseries = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries = SparseSeries(arr, index=index, kind='integer', fill_value=0) arr, index = _test_data2_zero() self.zbseries2 = SparseSeries(arr, index=index, kind='block', fill_value=0) self.ziseries2 = SparseSeries(arr, index=index, kind='integer', fill_value=0)
def test_infer_freq(): d1 = datetime(2008, 12, 31) d2 = datetime(2012, 9, 30) if _pandas_08x: b = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['B']).values d = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['D']).values w = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['W']).values m = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['M']).values a = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['A']).values q = DatetimeIndex(start=d1, end=d2, freq=_freq_to_pandas['Q']).values assert _infer_freq(w) == 'W-SUN' assert _infer_freq(a) == 'A-DEC' assert _infer_freq(q) == 'Q-DEC' assert _infer_freq(w[:3]) == 'W-SUN' assert _infer_freq(a[:3]) == 'A-DEC' assert _infer_freq(q[:3]) == 'Q-DEC' else: from pandas import DateRange b = DateRange(d1, d2, offset=_freq_to_pandas['B']).values d = DateRange(d1, d2, offset=_freq_to_pandas['D']).values w = DateRange(d1, d2, offset=_freq_to_pandas['W']).values m = DateRange(d1, d2, offset=_freq_to_pandas['M']).values a = DateRange(d1, d2, offset=_freq_to_pandas['A']).values q = DateRange(d1, d2, offset=_freq_to_pandas['Q']).values assert _infer_freq(w) == 'W' assert _infer_freq(a) == 'A' assert _infer_freq(q) == 'Q' assert _infer_freq(w[:3]) == 'W' assert _infer_freq(a[:3]) == 'A' assert _infer_freq(q[:3]) == 'Q' assert _infer_freq(b[2:5]) == 'B' assert _infer_freq(b[:3]) == 'D' assert _infer_freq(b) == 'B' assert _infer_freq(d) == 'D' assert _infer_freq(m) == 'M' assert _infer_freq(d[:3]) == 'D' assert _infer_freq(m[:3]) == 'M'
def test_can_serialize_dates(self): rng = [x.date() for x in DateRange('1/1/2000', '1/30/2000')] frame = DataFrame(np.random.randn(len(rng), 4), index=rng) self._check_roundtrip(frame, tm.assert_frame_equal)
def daysLeft(self, date): """ business days to expiration date """ from pandas import DateRange # this will cause a problem with pandas 0.14 and higher... Method is depreciated and replaced by DatetimeIndex r = DateRange(date, self.expirationDate()) return len(r)
def daysLeft(self, date): """ business days to expiration date """ r = DateRange(date, self.expirationDate()) return len(r)