def test_resample_5minute(self): rng = period_range('1/1/2000', '1/5/2000', freq='T') ts = TimeSeries(np.random.randn(len(rng)), index=rng) result = ts.resample('5min') expected = ts.to_timestamp().resample('5min') assert_series_equal(result, expected)
def save2csv(r, csvpath=None, fileNamePrefix=''): """ Parse and save to csv """ # Create Dataframe try: d = {} for tup in r.json(): d[dt.fromtimestamp(tup[0])] = tup[1] #pdb.set_trace() Ts = TimeSeries(data=d) # this line gives an error. Should be checked, but for now I keep the nan's # Ts = Ts[Ts != 'nan'] except: print "-------> Problem with Flukso data parsing <-------" raise # save to file if csvpath is None: csvpath = os.getcwd() s = strftime("%Y-%m-%d_%H-%M-%S", Ts.index[0].timetuple()) e = strftime("%Y-%m-%d_%H-%M-%S", Ts.index[-1].timetuple()) Ts.to_csv( os.path.join(csvpath, fileNamePrefix + '_FROM_' + s + '_TO_' + e + '.csv'))
def save2csv(r, csvpath=None, fileNamePrefix=''): """ Parse and save to csv """ # Create Dataframe try: d = {} for tup in r.json(): d[dt.fromtimestamp(tup[0])] = tup[1] #pdb.set_trace() Ts = TimeSeries(data=d) # this line gives an error. Should be checked, but for now I keep the nan's # Ts = Ts[Ts != 'nan'] except: print "-------> Problem with Flukso data parsing <-------" raise # save to file if csvpath is None: csvpath = os.getcwd() s = strftime("%Y-%m-%d_%H-%M-%S",Ts.index[0].timetuple()) e = strftime("%Y-%m-%d_%H-%M-%S",Ts.index[-1].timetuple()) Ts.to_csv(os.path.join(csvpath, fileNamePrefix + '_FROM_' + s + '_TO_' + e + '.csv'))
def test_pad_nan(self): x = TimeSeries([np.nan, 1., np.nan, 3., np.nan], ['z', 'a', 'b', 'c', 'd'], dtype=float) x = x.fillna(method='pad') expected = TimeSeries([np.nan, 1.0, 1.0, 3.0, 3.0], ['z', 'a', 'b', 'c', 'd'], dtype=float) assert_series_equal(x[1:], expected[1:]) self.assert_(np.isnan(x[0]), np.isnan(expected[0]))
def test_median(self): self.assertAlmostEqual(np.median(self.ts), self.ts.median()) ts = self.ts.copy() ts[::2] = np.NaN self.assertAlmostEqual(np.median(ts.valid()), ts.median()) # test with integers, test failure int_ts = TimeSeries(np.ones(10, dtype=int), index=range(10)) self.assertAlmostEqual(np.median(int_ts), int_ts.median())
def __init__(self, data, dtime,**kwargs): """ Time series w/ specific IO methods """ self.__dict__.update(kwargs) TimeSeries.__init__(self, data, index=dtime) #super(ObsTimeSeries,self).__init__(data,index=dtime) # Time coordinates self.nt = self.index.shape self.tsec = othertime.SecondsSince(self.index,\ basetime = pd.datetime(self.baseyear,1,1))
def __init__(self, data, dtime, **kwargs): """ Time series w/ specific IO methods """ self.__dict__.update(kwargs) TimeSeries.__init__(self, data, index=dtime) #super(ObsTimeSeries,self).__init__(data,index=dtime) # Time coordinates self.nt = self.index.shape self.tsec = othertime.SecondsSince(self.index,\ basetime = pd.datetime(self.baseyear,1,1))
def _split(self, frame): if self.share_afterward == 1: return splits = [self.share_afterward, 1.0] adj_day = self.ex_date - datetime.timedelta(days=1) indexes = [] indexes.append(adj_day) indexes.append(datetime.date.today()) splits = TimeSeries(splits, index=indexes) ri_splits = splits.reindex(frame.index, method='backfill') frame['adjclose'] = frame['adjclose'] / ri_splits
def _divide(self, frame): if self.cash_afterward == 0: return cashes = [self.cash_afterward, 0.0] adj_day = self.ex_date - datetime.timedelta(days=1) indexes = [] indexes.append(adj_day) indexes.append(datetime.date.today()) cashes = TimeSeries(cashes, index=indexes) ri_cashes = cashes.reindex(frame.index, method='backfill') frame['adjclose'] = frame['adjclose'] - ri_cashes
def test_resample_fill_missing(self): rng = PeriodIndex([2000, 2005, 2007, 2009], freq='A') s = TimeSeries(np.random.randn(4), index=rng) stamps = s.to_timestamp() filled = s.resample('A') expected = stamps.resample('A').to_period('A') assert_series_equal(filled, expected) filled = s.resample('A', fill_method='ffill') expected = stamps.resample('A', fill_method='ffill').to_period('A') assert_series_equal(filled, expected)
def setUp(self): """ Building test case scaffolding. """ fore.forecast = Mock() fore.graphics = Mock() self.config_file = Mock() seriesidx = PeriodIndex(start=ctime(10000), periods=10) self.tseries = TimeSeries(data=range(10), index=seriesidx)
def test(): """DataFrame editor test""" from numpy import nan df1 = DataFrame([ [True, "bool"], [1+1j, "complex"], ['test', "string"], [1.11, "float"], [1, "int"], [np.random.rand(3, 3), "Unkown type"], ["Large value", 100], ["áéí", "unicode"] ], index=['a', 'b', nan, nan, nan, 'c', "Test global max", 'd'], columns=[nan, 'Type']) out = test_edit(df1) print("out:", out) out = test_edit(df1.iloc[0]) print("out:", out) df1 = DataFrame(np.random.rand(100001, 10)) # Sorting large DataFrame takes time df1.sort(columns=[0, 1], inplace=True) out = test_edit(df1) print("out:", out) out = test_edit(TimeSeries(np.arange(10))) print("out:", out) return out
def get_metric_statistics(self, project_id, end_time, metric_name, namespace, period, start_time, statistics, unit=None, dimensions=None): """ 입력받은 조건에 일치하는 메트릭의 통계자료 리스트를 반환한다. """ def to_datapoint(df, idx): datapoint = df.ix[idx].dropna() if len(datapoint): return idx, datapoint end_idx = end_time.replace(second=0, microsecond=0) start_idx = start_time.replace(second=0, microsecond=0) start_ana_idx = start_idx - datetools.Minute() * (period / 60) daterange = DateRange(start_idx, end_idx, offset=datetools.Minute()) daterange_ana = DateRange(start_ana_idx, end_idx, offset=datetools.Minute()) # load default unit for metric from database if unit == "None" or not unit: metric_key = self.cass.get_metric_key(project_id=project_id, namespace=namespace, metric_name=metric_name, dimensions=dimensions) if metric_key: unit = self.cass.get_metric_unit(metric_key) else: unit = "None" # load statistics data from database stats = self.cass.get_metric_statistics(project_id=project_id, namespace=namespace, metric_name=metric_name, start_time=start_ana_idx, end_time=end_time, period=period, statistics=statistics, dimensions=dimensions) period = period / 60 # convert sec to min stat = DataFrame(index=daterange) for statistic, series in zip(statistics, stats): func = self.ROLLING_FUNC_MAP[statistic] ts = TimeSeries(series, index=daterange_ana) rolled_ts = func(ts, period, min_periods=0) stat[statistic] = rolled_ts.ix[::period] LOG.debug("stat %s\n%s" % (statistic, stat[statistic])) ret = filter(None, (to_datapoint(stat, i) for i in stat.index)) return ret, unit
def attach_ynames(self, result): squeezed = result.squeeze() # May be zero-dim, for example in the case of forecast one step in tsa if squeezed.ndim < 2: return TimeSeries(squeezed, name=self.ynames) else: return DataFrame(result, columns=self.ynames)
def test_ar_select_order(): # 2118 np.random.seed(12345) y = sm.tsa.arma_generate_sample([1, -.75, .3], [1], 100) ts = TimeSeries(y, index=DatetimeIndex(start='1/1/1990', periods=100, freq='M')) ar = AR(ts) res = ar.select_order(maxlag=12, ic='aic') assert_(res == 2)
def create_data(): """ create the pickle data """ import numpy as np import pandas from pandas import (Series, TimeSeries, DataFrame, Panel, SparseSeries, SparseTimeSeries, SparseDataFrame, SparsePanel, Index, MultiIndex, PeriodIndex, date_range, bdate_range, Timestamp) nan = np.nan data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.], } index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=TimeSeries(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two']))) frame = dict( float=DataFrame(dict(A=series['float'], B=series['float'] + 1)), int=DataFrame(dict(A=series['int'], B=series['int'] + 1)), mixed=DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']])), mi=DataFrame(dict(A=np.arange(5).astype(np.float64), B=np.arange(5).astype(np.int64)), index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second']))) panel = dict( float=Panel(dict(ItemA=frame['float'], ItemB=frame['float'] + 1))) return dict(series=series, frame=frame, panel=panel, index=index, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()))
def test_period_index(): # test 1285 from pandas import PeriodIndex, TimeSeries dates = PeriodIndex(start="1/1/1990", periods=20, freq="M") x = np.arange(1, 21.) model = TimeSeriesModel(Series(x, index=dates)) npt.assert_(model.data.freq == "M") model = TimeSeriesModel(TimeSeries(x, index=dates)) npt.assert_(model.data.freq == "M")
def _divide(self, frame): """divided close price to adjclose column WARNING ======= frame should be chronological ordered otherwise wrong backfill. """ if self.cash_afterward == 0: return cashes = [self.cash_afterward, 0.0] adj_day = self.ex_date - datetime.timedelta(days=1) indexes = [] indexes.append(self.d2t(adj_day)) indexes.append(self.d2t(datetime.date.today())) cashes = TimeSeries(cashes, index=indexes) ri_cashes = cashes.reindex(frame.index, method='backfill') frame['adjclose'] = frame['adjclose'] - ri_cashes
tradeSummary = addDailyPNLChange(tradeSummary, 'DynamicDollarPNL', 'RealizedDollarPNL', 'DailyPNLChange') tradeSummary.DailyPNLChange[tradeSummary.Action == 'none'] = 0 tradeSummaryList.append(tradeSummary) #cd "C:\Gary Yang\Dropbox\seasonal_Report\Test" #tradeSummary.to_csv('test.csv') ################################################################################################ os.chdir("C:\\Users\\GYANG\\Google Drive\\Historical Data\\Sensonal_Fu_D") #""" Generate a total Summary from all trades depends on # of Trades vs. PNL """ #totalSummary = mergeResultList(tradeSummaryList) #totalSummary.to_csv('totalSummary.csv') """ Generate time-series based data """ t_start = stringDate_toDatetime(str(backTestYearStart-1) + "0101") t_end = datetime.now() ts = TimeSeries(pd.date_range(t_start, t_end)) ''' For plotting ''' t_code = getDateCode(ts) t_int = getDateInt(ts) timeFrame_index = 'DateTime' tradeSummaryList = set_timeFrameIndex(tradeSummaryList, timeFrame_index) #''' get a certain tradeSummary to fit in timeFrame ''' #x = tradeEnd - tradeStart #tf_tradeSummary = tradeSummary_fitTimeFrame(x, tradeSummaryList, ts, 'TradeID', 'Year', 'Symbol', 'StartTrading', 'EndTrading', 'Date', \ # 'Close', 'IsPeriod', 'Action', 'PosSize', 'PosDir', 'PosPrc', 'DynamicPNL', 'RealizedPNL', 'DynamicDollarPNL', 'RealizedDollarPNL', 'DailyPNLChange') #tf_tradeSummary.to_csv(str(x)+'.csv') """ Generate Lists that fill the timeFrame """ #startTradingList = [] #endTradingList = []
def test_cant_fill_missing_dups(self): rng = PeriodIndex([2000, 2005, 2005, 2007, 2007], freq='A') s = TimeSeries(np.random.randn(5), index=rng) self.assertRaises(Exception, s.resample, 'A')
def _simple_pts(start, end, freq='D'): rng = period_range(start, end, freq=freq) return TimeSeries(np.random.randn(len(rng)), index=rng)
def create_data(): """ create the pickle data """ from distutils.version import LooseVersion import numpy as np import pandas from pandas import (Series,TimeSeries,DataFrame,Panel, SparseSeries,SparseTimeSeries,SparseDataFrame,SparsePanel, Index,MultiIndex,PeriodIndex, date_range,period_range,bdate_range,Timestamp,Categorical) nan = np.nan data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E' : [0., 1, Timestamp('20100101'),'foo',2.], } index = dict(int = Index(np.arange(10)), date = date_range('20130101',periods=10), period = period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2 = MultiIndex.from_tuples(tuple(zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float = Series(data['A']), int = Series(data['B']), mixed = Series(data['E']), ts = TimeSeries(np.arange(10).astype(np.int64),index=date_range('20130101',periods=10)), mi = Series(np.arange(5).astype(np.float64),index=MultiIndex.from_tuples(tuple(zip(*[[1,1,2,2,2], [3,4,3,4,5]])), names=['one','two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz']))) frame = dict(float = DataFrame(dict(A = series['float'], B = series['float'] + 1)), int = DataFrame(dict(A = series['int'] , B = series['int'] + 1)), mixed = DataFrame(dict([ (k,data[k]) for k in ['A','B','C','D']])), mi = DataFrame(dict(A = np.arange(5).astype(np.float64), B = np.arange(5).astype(np.int64)), index=MultiIndex.from_tuples(tuple(zip(*[['bar','bar','baz','baz','baz'], ['one','two','one','two','three']])), names=['first','second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))), cat_and_float=DataFrame(dict(A=Categorical(['foo', 'bar', 'baz']), B=np.arange(3).astype(np.int64))), ) panel = dict(float = Panel(dict(ItemA = frame['float'], ItemB = frame['float']+1)), dup = Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=['A', 'B', 'A'])) if LooseVersion(pandas.__version__) >= '0.14.1': # Pre-0.14.1 versions generated non-unpicklable mixed-type frames and # panels if their columns/items were non-unique. mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int'])) mixed_dup_panel.items = ['ItemA', 'ItemA'] frame['mixed_dup'] = mixed_dup_df panel['mixed_dup'] = mixed_dup_panel return dict( series = series, frame = frame, panel = panel, index = index, mi = mi, sp_series = dict(float = _create_sp_series(), ts = _create_sp_tsseries()), sp_frame = dict(float = _create_sp_frame()) )
def attach_dates(self, result): return TimeSeries(result, index=self.predict_dates)
def test_median(self): self._check_stat_op('median', np.median) # test with integers, test failure int_ts = TimeSeries(np.ones(10, dtype=int), index=range(10)) self.assertAlmostEqual(np.median(int_ts), int_ts.median())
def create_data(): """ create the pickle/msgpack data """ data = { 'A': [0., 1., 2., 3., np.nan], 'B': [0, 1, 0, 1, 0], 'C': ['foo1', 'foo2', 'foo3', 'foo4', 'foo5'], 'D': date_range('1/1/2009', periods=5), 'E': [0., 1, Timestamp('20100101'), 'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101')) if LooseVersion(pandas.__version__) >= '0.17.0': scalars['period'] = Period('2012', 'M') index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10)) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'foo', 'foo', 'qux', 'qux'], ['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']])), names=['first', 'second'])) series = dict(float=Series(data['A']), int=Series(data['B']), mixed=Series(data['E']), ts=TimeSeries(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=['one', 'two'])), dup=Series(np.arange(5).astype(np.float64), index=['A', 'B', 'C', 'D', 'A']), cat=Series(Categorical(['foo', 'bar', 'baz']))) if LooseVersion(pandas.__version__) >= '0.17.0': series['period'] = Series([Period('2000Q1')] * 5) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list("ABCDA") frame = dict( float=DataFrame(dict(A=series['float'], B=series['float'] + 1)), int=DataFrame(dict(A=series['int'], B=series['int'] + 1)), mixed=DataFrame(dict([(k, data[k]) for k in ['A', 'B', 'C', 'D']])), mi=DataFrame(dict(A=np.arange(5).astype(np.float64), B=np.arange(5).astype(np.int64)), index=MultiIndex.from_tuples(tuple( zip(*[['bar', 'bar', 'baz', 'baz', 'baz'], ['one', 'two', 'one', 'two', 'three']])), names=['first', 'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=['A', 'B', 'A']), cat_onecol=DataFrame(dict(A=Categorical(['foo', 'bar']))), cat_and_float=DataFrame( dict(A=Categorical(['foo', 'bar', 'baz']), B=np.arange(3).astype(np.int64))), mixed_dup=mixed_dup_df) mixed_dup_panel = Panel(dict(ItemA=frame['float'], ItemB=frame['int'])) mixed_dup_panel.items = ['ItemA', 'ItemA'] panel = dict(float=Panel( dict(ItemA=frame['float'], ItemB=frame['float'] + 1)), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=['A', 'B', 'A']), mixed_dup=mixed_dup_panel) return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()))