def _collect_and_plot(files): TS = [] location = [] for f in files: temperatures = [ section[1] for section in parse.parse_file(f)[1:-1] if section[1]['Plant'] == ['tmp'] ] for t in temperatures: if t['Step'][0] != '0000-00-00.01:00:00': print 'Not hourly readings of temperature. Abort.' break dates = ts.date_array(start_date=ts.Date('H', t['Start'][0]), length=len(t['Value'])) data = [ float(value.rsplit('/')[0]) for value in t['Value'] ] TS.append(ts.TimeSeries(data=data, dates=dates)) if location and t['Installation'][0] != location: print 'Location changed during reading of gs2 files. Probably some bad grouping of gs2 files.' location = t['Installation'][0] if TS: path = '/Users/tidemann/Documents/NTNU/devel/data/eklima/Telemark/' for file in os.listdir(path): try: series = xml.parse(path + file) sg.utils.plot_time_series([ts.concatenate((TS)), series], ['b-','r-'], [location, file]) except: print file, 'had no data.' else: print 'No temperature data.'
def __init__(self, **kwargs): """ kwargs ={'baisnName':'Mackenzie', 'start_date':'2000-06-01', 'end_date':'2010-06-31', 'info_fl':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge\RiverGages_description.csv', 'pthIn':r'C:\00_Work\02_Sim\00_Mackenzie\01_Data\01_Selected_RiverDischarge'} """ for key in kwargs: setattr(self, key, kwargs[key]) self._load_rivergages() self._idate = ts.Date('D', self.start_date) self._idateHStr = self._idate.strfmt('%Y/%m/%d') + ' 00:00' self._fdate = ts.Date('D', self.end_date) self._dates = ts.date_array(start_date=self._idate, end_date=self._fdate, freq='d') self.nr_days = self._fdate - self._idate + 1 self._get_matrix()
def test_tsfromtxt(self): "Tests reading from a text file." fcontent = """# 'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)' '2007-01', 'strings',1,1.0,'mixed column',,1 '2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1 '2007-03', 'strings',3,3.0E5,3,,1 '2007-05','strings',4,-1e-10,,,1 """ import os from datetime import datetime import tempfile (tmp_fd, tmp_fl) = tempfile.mkstemp() os.write(tmp_fd, fcontent) os.close(tmp_fd) mrectxt = tsfromtxt(tmp_fl, delimiter=',', names=tuple('ABCDEFG'), datecols=0, skip_header=2, asrecarray=True) os.remove(tmp_fl) # dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)] self.failUnless(isinstance(mrectxt, TimeSeriesRecords)) assert_equal(mrectxt._dates, date_array(dlist, 'M')) assert_equal(mrectxt.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F']) assert_equal(mrectxt.F, [1, 1, 1, 1]) assert_equal(mrectxt.E._mask, [1, 1, 1, 1]) assert_equal(mrectxt.C, [1, 2, 300000, -1e-10])
def test_sorted(self): dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)] (a, b) = zip(*[(3., 30), (2., 20), (1., 10), ]) ndtype = [('a', np.float), ('b', np.int)] controldates = date_array(dates, freq='D') controldates.sort_chronologically() series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype) assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(series._dates, controldates) # trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.]) # trec = fromrecords(zip(a, b), dates, names=('a', 'b')) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.]) # trec = fromarrays([a, b], dates, names=('a', 'b')) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.])
def _get_year(year,d,n=1): """ Return a time-series with the same frequency of the input time-series with n complete years from input year and values taken from input series :param year: base year :type year: integer :param d: time-series object :type d: time-series :param n: number of periods to take :type n: integer :return: output time-series :rtype: time-series """ f = d.freqstr # frequenza d'ingresso nels = _ts_nels(f) # numero di elementi da considerare in un anno (M=12, Q=4, A=1) N=n*nels # Numero di elementi totali da considerare startd = d.start_date endd = d.end_date if f[0]=='M': starty = ts.Date(f,year=year,month=1) endy = ts.Date(f,year=year,month=N) elif f[0]=='Q': starty = ts.Date(f,year=year,quarter=1) endy = ts.Date(f,year=year,quarter=N) elif f[0]=='A': starty = ts.Date(f,year=year) endy = ts.Date(f,year=year+N-1) else: raise UnknownFrequencyError, f # Create a timeseries with N elements np.nan # from starty with frequency f s = ts.time_series([ np.nan for i in range(0,N)], start_date=starty, freq=f) # create date range da = ts.date_array(start_date=starty, end_date=endy, freq=f) d.fill_missing_dates() d.adjust_endpoints() # copy values from d to s d.mask=False for _d in da: s[_d]=np.nan if _d <= d.end_date: s[_d]=d[_d] else: s[_d]=np.nan return s
def test_convert_to_annual(self): "Test convert_to_annual" base = dict(D=1, H=24, T=24 * 60, S=24 * 3600) #for fq in ('D', 'H', 'T', 'S'): # Don't test for minuTe and Second frequency, too time consuming. for fq in ('D', 'H'): dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'), end_date=Date(fq, '2004-12-31 23:59:59')) bq = base[fq] series = time_series(range(365 * bq) * 3 + range(366 * bq), dates=dates) control = ma.masked_all((4, 366 * bq), dtype=series.dtype) control[0, :58 * bq] = range(58 * bq) control[0, 59 * bq:] = range(58 * bq, 365 * bq) control[[1, 2]] = control[0] control[3] = range(366 * bq) test = convert_to_annual(series) assert_equal(test, control) # series = time_series(range(59, 365) + range(366) + range(365), start_date=Date('D', '2003-03-01')) test = convert_to_annual(series) assert_equal(test[:, 59:62], ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], - 1))
def test_sorted(self): dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)] (a, b) = zip(*[ (3., 30), (2., 20), (1., 10), ]) ndtype = [('a', np.float), ('b', np.int)] controldates = date_array(dates, freq='D') controldates.sort_chronologically() series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype) assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(series._dates, controldates) # trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.]) # trec = fromrecords(zip(a, b), dates, names=('a', 'b')) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.]) # trec = fromarrays([a, b], dates, names=('a', 'b')) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.])
def add_diurnal(tseries, sine_period, peak_mag): """ Scales a time series to a sine wave of peak_mag with sine_period. Input: tseries, sine_period (float, hrs), peak_mag (float) Output: scaled_data (array-like) """ # Convert sine_period to same frequency as tseries # Create a time delta of magnitude sine_period # Convert that time delta into frequency units same as tseries zero_date = ts.now('H') second_date = zero_date + sine_period time_delta = ts.date_array([zero_date, second_date]) time_delta = time_delta.asfreq(tseries.freq) sine_period = float(time_delta[1] - time_delta[0]) angular_freq = (2. * np.pi) / sine_period for i in range(len(tseries)-1): passed_time = float(tseries.dates[i]- tseries.start_date) sine_factor = peak_mag * np.sin(angular_freq * passed_time) tseries[i] = tseries[i] + tseries[i] * sine_factor return tseries # Generate power density function (pdf) to create synthetic TPM from # mean, stdev, autocorr, npointsx # def gen_pdf(desired_mean, desired_stdev, bin_width): ## TODO return 0
def test_convert_to_annual(self): "Test convert_to_annual" base = dict(D=1, H=24, T=24 * 60, S=24 * 3600) #for fq in ('D', 'H', 'T', 'S'): # Don't test for minuTe and Second frequency, too time consuming. for fq in ('D', 'H'): dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'), end_date=Date(fq, '2004-12-31 23:59:59')) bq = base[fq] series = time_series(range(365 * bq) * 3 + range(366 * bq), dates=dates) control = ma.masked_all((4, 366 * bq), dtype=series.dtype) control[0, :58 * bq] = range(58 * bq) control[0, 59 * bq:] = range(58 * bq, 365 * bq) control[[1, 2]] = control[0] control[3] = range(366 * bq) test = convert_to_annual(series) assert_equal(test, control) # series = time_series(range(59, 365) + range(366) + range(365), start_date=Date('D', '2003-03-01')) test = convert_to_annual(series) assert_equal( test[:, 59:62], ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], -1))
def _get_tseriesD(freq,date_values,kw): v = [ xlrd.xldate_as_tuple(int(d),0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1],day=_v[2]) for _v in v] date_array = ts.date_array(D) return date_array
def request(self,reqs,**kw): """Random Request >>> ds = Random() >>> xs = ds.request([{'Instrument':'NAME=PIPPO~:2012-12-31~2007-01-01~M', 'Source': 'FRED'},]) >>> ts = xs['PIPPO']._data >>> print ts.freqstr M """ logger.debug('request') rx={} kw=udict(kw) if 'SEED' in kw: seed = int(kw['SEED']) rand.seed(seed) for rq in reqs: logger.debug("Request: %s",rq) s = parse_instrument(rq['Instrument']) logger.debug(s) params = ldict(mk_params(s['TICKER'])) if 'name' in params: name = params['name'].upper() del params['name'] else: name = rq['Source'].upper() if 'NAME' in kw and kw['NAME']: name = kw['NAME'].upper() kw['NAME']=name if s['FREQ']!='0': dr = ts.date_array(freq=s['FREQ'],start_date=s['START'],end_date=s['END']) L = len(dr) else: L = s['END']-s['START'] params['size']=L dd = np.zeros(L) logger.debug(params) try: if re.match('^constant$',rq['Source'],re.I): name = 'CONSTANT' v = float(name) params['LOW']=v params['HIGH']=v dd = rand.uniform(**params) elif re.match('^walk$',rq['Source'],re.I): dd = RandomWalk(**params) elif re.match('^uniform|beta|binomial|chisquare|exponential|gamma|geometric|gumbel|hypergeometric|laplace|logistic|lognormal|logseries|multinomial|multivariate_normal|negative_binomial|noncentral_chisquare|noncentral_f|normal|pareto|poisson|power|rayleigh|standard_cauchy|standard_exponential|standard_gamma|standard_normal|standard_t|triangular|uniform|vonmises|wald|weibull|zipf$',rq['Source'],re.I): generator = rq['Source'].lower() if hasattr(rand,generator): f = getattr(rand,generator) dd = f(**params) ## Add other distributions here else: raise TypeError, 'Unknown generator' except TypeError, exc: logging.error('%s generator - %s',rq['Source'],exc.args[0]) except:
def _make_predict_dates(self): data = self._data dtstart = data.predict_start dtend = data.predict_end freq = data.freq #pandas_freq = _freq_to_pandas[freq] dates = date_array(start_date=dtstart, end_date=dtend, freq=freq).toordinal().astype(int) self._data.predict_dates = asarray( [datetime.datetime.fromordinal(i) for i in dates])
def _get_tseriesQ(freq,date_values,kw): # print date_values by=0 if kw.has_key('YEAR'): by = eval(kw['YEAR'])-1 v = [ (int(d)-1)%4+1+(int(_i/4)*4)+by*4 for _i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ ts.Date(freq=str(freq),value=_v) for _v in v] date_array = ts.date_array(D) return date_array
def setup(self): "Generic setup" d = np.arange(5) m = ma.make_mask([1, 0, 0, 1, 1]) base_d = np.r_[d, d[::-1]].reshape(2, -1).T base_m = np.r_[[m, m[::-1]]].T base = ma.array(base_d, mask=base_m) mrec = mr.fromarrays(base.T,) dlist = ['2007-%02i' % (i + 1) for i in d] dates = date_array(dlist) mts = time_series(mrec, dates) rts = time_records(mrec, dates) self.data = [d, m, mrec, dlist, dates, mts, rts]
def setup(self): "Generic setup" d = np.arange(5) m = ma.make_mask([1, 0, 0, 1, 1]) base_d = np.r_[d, d[::-1]].reshape(2, -1).T base_m = np.r_[[m, m[::-1]]].T base = ma.array(base_d, mask=base_m) mrec = mr.fromarrays(base.T, ) dlist = ['2007-%02i' % (i + 1) for i in d] dates = date_array(dlist) mts = time_series(mrec, dates) rts = time_records(mrec, dates) self.data = [d, m, mrec, dlist, dates, mts, rts]
def test_dates_on_several_columns(self): "Test tsfromtxt when the date spans several columns." datatxt = """ 2001, 01, 0.0, 10. 2001, 02, 1.1, 11. 2001, 02, 2.2, 12. """ data = StringIO.StringIO(datatxt) dateconverter = lambda y, m: Date('M', year=int(y), month=int(m)) test = tsfromtxt(data, delimiter=',', dtype=float, datecols=(0, 1), dateconverter=dateconverter) assert_equal(test, [[0., 10.], [1.1, 11.], [2.2, 12.]]) assert_equal(test.dates, date_array(['2001-01', '2001-02', '2001-02'], freq='M'))
def _make_predict_dates(self): try: from scikits.timeseries import date_array except ImportError: self._data.predict_dates = None data = self._data dtstart = data.predict_start dtend = data.predict_end freq = data.freq #pandas_freq = _freq_to_pandas[freq] dates = date_array(start_date=dtstart, end_date=dtend, freq=freq).toordinal().astype(int) self._data.predict_dates = asarray( [datetime.datetime.fromordinal(i) for i in dates])
def test_with_datecols(self): "Test two datecols" fcontent = StringIO.StringIO(""" year, month, A, B 2009, 01, 1, 1. 2009, 03, 3, 3. """) dateconv = lambda y, m: Date("M", year=int(y), month=int(m)) test = tsfromtxt(fcontent, delimiter=",", skip_header=1, names=True, converters={'dates': dateconv}, datecols=(0, 1)) dates = date_array(['2009-01', '2009-03'], freq='M') assert_equal(test.dates.tovalue(), dates) assert_equal(test['A'], [1, 3]) assert_equal(test['B'], [1., 3.]) assert_equal(test.dtype, np.dtype([('A', int), ('B', float)]))
def _get_tseriesM(freq,date_values,kw): dformat='INT' if kw.has_key('DFORMAT'): dformat = kw['DFORMAT'] if dformat=='INT': v = [ (int(d)-1)*12+i%12+1 for i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ ts.Date(freq=str(freq),value=_v) for _v in v] elif dformat=='XL_DATE': v = [ xlrd.xldate_as_tuple(d,0) for i,d in enumerate(date_values.flatten()) if not np.isnan(d) ] D = [ ts.Date(freq=str(freq),year=_v[0],month=_v[1]) for _v in v] else: logger.error('DATE FORMAT NOT SUPPORTED ON EXCEL READING') raise ValueError, dformat # print "FREQ=|%s|"%freq,D date_array = ts.date_array(D) return date_array
def setUp(self): "Initializes" ndtype = [('lin',float),('rand',float)] dlin = np.linspace(0,10,120) drnd = np.random.rand(120) data = np.array(zip(dlin, drnd), dtype=ndtype) dates = ts.date_array(start_date=ts.now('M')-120, length=120, freq='M') enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1,1,120), dates=dates, thresholds=(-0.5,0.5), full_year='False', refseason='NDH', minsize=5) cdat = data.view(ClimateRecords) cdat._dates = dates cdat.ensoindicator = enso self.dlin = dlin self.cdat=cdat self.enso=enso
def test_with_names(self): "Tests w/ names" fcontent = StringIO.StringIO("""# 'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)' '2007-01', 'strings',1,1.0,'mixed column',,1 '2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1 '2007-03', 'strings',3,3.0E5,3,,1 '2007-05','strings',4,-1e-10,,,1 """) test = tsfromtxt(fcontent, delimiter=",", datecols=0, skip_header=2, names="A,B,C,D,E,F", freq='M') assert(isinstance(test, TimeSeries)) dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)] assert_equal(test.dates.tovalue(), date_array(dlist, freq='M').tovalue()) assert_equal(test.dtype.names, ['A', 'B', 'C', 'D', 'E', 'F']) assert_equal(test['F'], [1, 1, 1, 1]) assert_equal(test['E'].mask, [1, 1, 1, 1]) assert_equal(test['C'], [1, 2, 300000, -1e-10])
def setUp(self): "Initializes" ndtype = [('lin', float), ('rand', float)] dlin = np.linspace(0, 10, 120) drnd = np.random.rand(120) data = np.array(zip(dlin, drnd), dtype=ndtype) dates = ts.date_array(start_date=ts.now('M') - 120, length=120, freq='M') enso = ENSOIndicator(np.random.rand(120) + np.linspace(-1, 1, 120), dates=dates, thresholds=(-0.5, 0.5), full_year='False', refseason='NDH', minsize=5) cdat = data.view(ClimateRecords) cdat._dates = dates cdat.ensoindicator = enso self.dlin = dlin self.cdat = cdat self.enso = enso
def test_without_names(self): "Test w/o names" fcontent = StringIO.StringIO("""# 'Dates', 'One (S)','Two (I)','Three (F)','Four (M)','Five (-)','Six (C)' '2007-01', 'strings',1,1.0,'mixed column',,1 '2007-02', 'with embedded "double quotes"',2,2.0,1.0,,1 '2007-03', 'strings',3,3.0E5,3,,1 '2007-05','strings',4,-1e-10,,,1 """) test = tsfromtxt(fcontent, delimiter=",", skip_header=1, names=True, freq='M') assert (isinstance(test, TimeSeries)) dlist = ['2007-%02i' % i for i in (1, 2, 3, 5)] assert_equal(test.dates.tovalue(), date_array(dlist, freq='M').tovalue()) assert_equal(test.dtype.names, ['One_S', 'Two_I', 'Three_F', 'Four_M', 'Five_', 'Six_C']) assert_equal(test['Six_C'], [1, 1, 1, 1]) assert_equal(test['Five_'].mask, [1, 1, 1, 1]) assert_equal(test['Three_F'], [1, 2, 300000, -1e-10])
import statsmodels.api as sm import numpy as np import pandas # Getting started # --------------- data = sm.datasets.sunspots.load() # Right now an annual date series must be datetimes at the end of the year. # We can use scikits.timeseries and datetime to create this array. import datetime import scikits.timeseries as ts dates = ts.date_array(start_date=1700, length=len(data.endog), freq="A") # To make an array of datetime types, we need an integer array of ordinals # .. from datetime import datetime # .. dt_dates = dates.toordinal().astype(int) # .. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates]) dt_dates = dates.tolist() # Using Pandas # ------------ # Make a pandas TimeSeries or DataFrame endog = pandas.Series(data.endog, index=dt_dates) # and instantiate the model
* both la and pandas handle datetime objects as object arrays * tabular requires conversion to structured dtype, but easy helper functions or methods are available in scikits.timeseries and tabular * not too bad for a first try Created on Sat Jan 30 08:33:11 2010 Author: josef-pktd """ import numpy as np import scikits.timeseries as ts s = ts.time_series([1, 2, 3, 4, 5], dates=ts.date_array( ["2001-01", "2001-01", "2001-02", "2001-03", "2001-03"], freq="M")) print '\nUsing la' import la dta = la.larry(s.data, label=[range(len(s.data))]) dat = la.larry(s.dates.tolist(), label=[range(len(s.data))]) s2 = ts.time_series(dta.group_mean(dat).x, dates=ts.date_array(dat.x, freq="M")) s2u = ts.remove_duplicated_dates(s2) print repr(s) print dat print repr(s2) print repr(s2u) print '\nUsing pandas'
""" Look at some macro plots, then do some VARs and IRFs. """ import numpy as np import statsmodels.api as sm import scikits.timeseries as ts import scikits.timeseries.lib.plotlib as tplt data = sm.datasets.macrodata.load(as_pandas=False) data = data.data ### Create Timeseries Representations of a few vars dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1), end_date=ts.Date('Q', year=2009, quarter=3)) ts_data = data[['realgdp', 'realcons', 'cpi']].view(float).reshape(-1, 3) ts_data = np.column_stack((ts_data, (1 - data['unemp'] / 100) * data['pop'])) ts_series = ts.time_series(ts_data, dates) fig = tplt.tsfigure() fsp = fig.add_tsplot(221) fsp.tsplot(ts_series[:, 0], '-') fsp.set_title("Real GDP") fsp = fig.add_tsplot(222) fsp.tsplot(ts_series[:, 1], 'r-') fsp.set_title("Real Consumption") fsp = fig.add_tsplot(223) fsp.tsplot(ts_series[:, 2], 'g-') fsp.set_title("CPI")
def getHappinessStats(fromFunc=False, groupId=None, userId=None, endDateIn=None): condFilter = [1==1] companyId = request.form.get("companyId") and request.form.get("companyId") or session.get("companyId") if companyId : condFilter.append(Happiness.companyId==companyId) else : return None if groupId: condFilter.append(Happiness.groupId==groupId) elif userId: condFilter.append(Happiness.userId==userId) if request.form.get('endDate'): endDateIn = request.form['endDate'] elif fromFunc==True and not endDateIn: endDateIn = datetime.now().strftime("%Y-%m-%d") if not endDateIn: return None if request.form.get("startDate"): condFilter.append(Happiness.rdate > request.form['startDate']) # cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal) # if request.form.get('type')=='all' and request.form.get('groupId') and request.form.get('userId'): # cols = cols + ( func.avg(func.IF(Happiness.groupId==groupId, Happiness.happyVal, None)) ) # cols = cols + ( func.avg(func.IF(Happiness.userId==userId, Happiness.happyVal, None)) ) cols = func.date(Happiness.rdate).label("rdate"), func.avg(Happiness.happyVal), func.avg(func.IF(Happiness.groupId==request.form.get('groupId'), Happiness.happyVal, None)), func.avg(func.IF(Happiness.userId==request.form.get('userId'), Happiness.happyVal, None)) db_result = db_session.query(*cols)\ .group_by( func.date(Happiness.rdate) )\ .filter(func.date(Happiness.rdate) <= endDateIn)\ .filter(*condFilter).all() db_dates = list() db_vals1 = list() db_vals2 = list() db_vals3 = list() for row in db_result: db_dates.append(row[0]) db_vals1.append(row[1]) if len(row)>2 : db_vals2.append(row[2]) db_vals3.append(row[3]) stats = list() dateAry = ts.date_array(db_dates, freq='D') timeSrz1 = ts.time_series(db_vals1, dateAry) fillVals1 = timeSrz1.fill_missing_dates(fill_value=0) fillVals1 = fillVals1.filled(0) timeSrz2 = ts.time_series(db_vals2, dateAry) fillVals2 = timeSrz2.fill_missing_dates(fill_value=0) fillVals2 = fillVals2.filled(0) timeSrz3 = ts.time_series(db_vals3, dateAry) fillVals3 = timeSrz3.fill_missing_dates(fill_value=0) fillVals3 = fillVals3.filled(0) if request.form.get('startDate'): startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d') else : startDate = db_dates[0] endDate = datetime.strptime(endDateIn, '%Y-%m-%d') fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D') firstDayIdx = 0 for day in fillDateAry : if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] : stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 }) firstDayIdx += 1 continue else : break for idx, val in enumerate(fillVals1): gr = fillVals2[idx] me = fillVals3[idx] rowDict = { "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "val" : "{0:.2f}".format(float(val or 0)) } rowDict["gr"] = "{0:.2f}".format(float(gr or 0)) rowDict["me"] = "{0:.2f}".format(float(me or 0)) stats.append(rowDict) statsLen = len(stats) for i in range(len(fillDateAry) - len(stats)) : stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "val" : 0, "gr":0, "me":0 }) if fromFunc==True : return stats else : return jsonify({"stats" : stats})
def load_oni(mode='standard', **options): """ Loads the ONI 3-m averaged monthly SST anomalies over the Niño-3.4 region and returns a :class:`~scikits.hydroclimpy.enso.ENSOIndicator` object. Two modes are accepted as arguments: - in the ``standard`` mode, the SSTs are retrieved from the original CPC website_. Data are available from Jan. 1950 to present. - in the ``backup`` mode, the SSTs are retrieved from the CPC `ftp site <ftpsite>`_. Data are available from Jan. 1900 to present. .. _website : http://www.cpc.noaa.gov/products/analysis_monitoring/ensostuff/ensoyears.shtml .. _ftpsite : ftp://eclipse.ncdc.noaa.gov/pub/ersst/pdo/el_nino_v3.dat. Parameters ---------- mode : {'standard','backup'}, optional Mode describing the data to download. options : dictionary Optional parameters to parse to the ENSOIndicator for the definition of ENSO indices. thresholds : tuple of floats, optional Low and high temperature thresholds for the definition of El Niño and La Niña conditions. By default, the CPC uses -0.5oC and +0.5oC. minimum_size : int, optional Minimum number of consecutive months in El Niño / La Niña conditions required for the definition of an episode. By default, the CPC use 5 consecutive months. reference_season : string or tuple, optional Months that must be in an episode for it to be valid. By default, the CPC uses None (no restriction on the months). full_year : boolean, optional The CPC uses ``full_year=False``. References ---------- Xue, Y., T. M. Smith, and R. W. Reynolds, 2003: Interdecadal changes of 30-yr SST normals during 1871-2000. *J. Climate*, 16, 1601-1612. """ # Initialization ....................... ensoarchive = dict(config.items('ENSO'))['ensoarchive'] if ensoarchive[-4:].lower() != '.zip': ensoarchive += '.zip' # mode = mode.lower() cfg = dict(config.items('ENSO.ONI')) cfg.update(options) try: from BeautifulSoup import BeautifulSoup, SoupStrainer except ImportError: warnings.warn("The module 'BeautifulSoup' is unavailable.\n"\ "Reverting to backup mode") mode = 'backup' # datadir = cfg['datadir'] if mode == 'standard': netfile = cfg['netfile'] archive = cfg['archive'] else: netfile = cfg['netfile_backup'] archive = cfg['archive_backup'] # Try to open an existing ENSOIndicator ensoarchive = dict(config.items('ENSO'))['ensoarchive'] if ensoarchive[-4:].lower() != '.zip': ensoarchive += '.zip' # try: zipf = zipfile.ZipFile(ensoarchive, 'r') ensoi = cPickle.loads(zipf.read(archive)) ensologger.info("... Loading from existing archived file") except IOError: zipf = zipfile.ZipFile(ensoarchive, 'w') ensologger.info("... Creating archive") except KeyError: zipf = zipfile.ZipFile(ensoarchive, 'a') ensologger.info("... Appending to archive") else: if isinstance(ensoi, enso.ENSOIndicator): return ensoi # sourcedir = np.lib._datasource.DataSource(datadir) dfile = sourcedir.open(netfile) # # if mode == 'standard': # Load the file as a tree, but only take the SST table (border=1) table = BeautifulSoup(dfile.read(), parseOnlyThese=SoupStrainer("table", border=1)) # Separate it by rows, but skip the first one (the header) years = [] data = [] indices = [] color = {'red': +1, 'white': 0, 'blue': -1} deft = [(None, 'color:white')] for row in table.findAll("tr")[1:]: cols = row.findAll('td') years.append(int(cols.pop(0).strong.string)) data.append([ float(_.fetchText()[-1].string.replace(' ', '99.9')) for _ in cols ]) indices.append([ color[getattr(_.span, 'attrs', deft)[0][-1].split(':')[-1]] for _ in cols ]) # start_date = Date('M', year=years[0], month=1) ensoi = enso.ENSOIndicator( ma.masked_values(data, 99.9).ravel(), start_date=start_date, ) # oni.set_indices(full_year=False, minsize=5, refseason=None) indices = time_series(np.array(indices).ravel(), start_date=start_date) else: rawdata = np.loadtxt(dfile) dates = date_array( [Date('M', year=yy, month=mm) for (yy, mm) in rawdata[:, :2]], freq='M') ensoi = enso.ENSOIndicator( cmov_mean(rawdata[:, -1], 3).round(2), dates, ) # _set_ensoindicator_options(ensoi, **cfg) ensoi.set_indices() # # Store in the archive zipf.writestr(archive, cPickle.dumps(ensoi)) zipf.close() return ensoi
# t = timer() # mod_tb = tb.lpc(y, 2) # t_end = timer() # print str(t_end - t) + " seconds for talkbox.lpc" # print """For higher lag lengths ours quickly fills up memory and starts #thrashing the swap. Should we include talkbox C code or Cythonize the #Levinson recursion algorithm?""" ## Try with a pandas series import pandas import scikits.timeseries as ts d1 = ts.Date(year=1700, freq='A') #NOTE: have to have yearBegin offset for annual data until parser rewrite #should this be up to the user, or should it be done in TSM init? #NOTE: not anymore, it's end of year now ts_dr = ts.date_array(start_date=d1, length=len(sunspots.endog)) pandas_dr = pandas.DateRange(start=d1.datetime, periods=len(sunspots.endog), timeRule='A@DEC') #pandas_dr = pandas_dr.shift(-1, pandas.datetools.yearBegin) dates = np.arange(1700, 1700 + len(sunspots.endog)) dates = ts.date_array(dates, freq='A') #sunspots = pandas.Series(sunspots.endog, index=dates) #NOTE: pandas only does business days for dates it looks like import datetime dt_dates = np.asarray(lmap(datetime.datetime.fromordinal, ts_dr.toordinal().astype(int))) sunspots = pandas.Series(sunspots.endog, index=dt_dates) #NOTE: pandas can't handle pre-1900 dates
rearranged 1.00796791 0.24449867(-0.00521004) 0.50554663 garch11: [ 1.01258264 0.24149155 0.50479994] -2056.3877404 R include_constant=False Final Estimate: LLH: 2056.397 norm LLH: 2.056397 omega alpha1 beta1 1.0123560 0.2409589 0.5049154 ''' erro,ho, etaxo = generate_gjrgarch(20, ar, ma, mu=0.04, scale=0.01, varinnovation = np.ones(20)) if 'sp500' in examples: import tabular as tb import scikits.timeseries as ts a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv') s = ts.time_series(a[0]['Close'][::-1], dates=ts.date_array(a[0]['Date'][::-1],freq="D")) sp500 = a[0]['Close'][::-1] sp500r = np.diff(np.log(sp500)) plt.show()
dd = f(**params) ## Add other distributions here else: raise TypeError, 'Unknown generator' except TypeError, exc: logging.error('%s generator - %s',rq['Source'],exc.args[0]) except: raise if s['FREQ']!='0': data = ts.time_series(dd, freq=s['FREQ'], dates=dr) if s['NAN']: t=ts.now(data.freqstr) if t<data.end_date: da = ts.date_array(start_date=t,end_date=data.end_date) data[da]=np.nan rx[kw['NAME']]=Timeseries(data=data,name=name) else: # for a vector print type(dd),dd.shape xx = dd.reshape((1,-1)) print type(xx),xx.shape rx[kw['NAME']]=np.array(dd) return rx # random://normal/loc=10,scale=.2~:2012-12-31~2007-01-01~M?name=UNI if __name__=="__main__": parse_instrument('NAME=PIPPO') parse_instrument('NAME=PIPPO~M') parse_instrument('NAME=PIPPO~Q')
def _daily_finder(vmin, vmax, freq): periodsperday = -1 if freq >= _c.FR_HR: if freq == _c.FR_SEC: periodsperday = 24 * 60 * 60 elif freq == _c.FR_MIN: periodsperday = 24 * 60 elif freq == _c.FR_HR: periodsperday = 24 else: raise ValueError("unexpected frequency: %s" % check_freq_str(freq)) periodsperyear = 365 * periodsperday periodspermonth = 28 * periodsperday elif freq == _c.FR_BUS: periodsperyear = 261 periodspermonth = 19 elif freq == _c.FR_DAY: periodsperyear = 365 periodspermonth = 28 elif get_freq_group(freq) == _c.FR_WK: periodsperyear = 52 periodspermonth = 3 elif freq == _c.FR_UND: periodsperyear = 100 periodspermonth = 10 else: raise ValueError("unexpected frequency") # save this for later usage vmin_orig = vmin (vmin, vmax) = (int(vmin), int(vmax)) span = vmax - vmin + 1 dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax)) # Initialize the output info = np.zeros(span, dtype=[('val', int), ('maj', bool), ('min', bool), ('fmt', '|S20')]) info['val'][:] = np.arange(vmin, vmax + 1) info['fmt'][:] = '' info['maj'][[0, -1]] = True # .. and set some shortcuts info_maj = info['maj'] info_min = info['min'] info_fmt = info['fmt'] def first_label(label_flags): if (label_flags[0] == 0) and (label_flags.size > 1) and \ ((vmin_orig % 1) > 0.0): return label_flags[1] else: return label_flags[0] # Case 1. Less than a month if span <= periodspermonth: day_start = period_break(dates_, 'day') month_start = period_break(dates_, 'month') def _hour_finder(label_interval, force_year_start): _hour = dates_.hour _prev_hour = (dates_ - 1).hour hour_start = (_hour - _prev_hour) != 0 info_maj[day_start] = True info_min[hour_start & (_hour % label_interval == 0)] = True year_start = period_break(dates_, 'year') info_fmt[hour_start & (_hour % label_interval == 0)] = '%H:%M' info_fmt[day_start] = '%H:%M\n%d-%b' info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' if force_year_start and not has_level_label(year_start, vmin_orig): info_fmt[first_label(day_start)] = '%H:%M\n%d-%b\n%Y' def _minute_finder(label_interval): hour_start = period_break(dates_, 'hour') _minute = dates_.minute _prev_minute = (dates_ - 1).minute minute_start = (_minute - _prev_minute) != 0 info_maj[hour_start] = True info_min[minute_start & (_minute % label_interval == 0)] = True year_start = period_break(dates_, 'year') info_fmt = info['fmt'] info_fmt[minute_start & (_minute % label_interval == 0)] = '%H:%M' info_fmt[day_start] = '%H:%M\n%d-%b' info_fmt[year_start] = '%H:%M\n%d-%b\n%Y' def _second_finder(label_interval): minute_start = period_break(dates_, 'minute') _second = dates_.second _prev_second = (dates_ - 1).second second_start = (_second - _prev_second) != 0 info['maj'][minute_start] = True info['min'][second_start & (_second % label_interval == 0)] = True year_start = period_break(dates_, 'year') info_fmt = info['fmt'] info_fmt[second_start & (_second % label_interval == 0)] = '%H:%M:%S' info_fmt[day_start] = '%H:%M:%S\n%d-%b' info_fmt[year_start] = '%H:%M:%S\n%d-%b\n%Y' if span < periodsperday / 12000.0: _second_finder(1) elif span < periodsperday / 6000.0: _second_finder(2) elif span < periodsperday / 2400.0: _second_finder(5) elif span < periodsperday / 1200.0: _second_finder(10) elif span < periodsperday / 800.0: _second_finder(15) elif span < periodsperday / 400.0: _second_finder(30) elif span < periodsperday / 150.0: _minute_finder(1) elif span < periodsperday / 70.0: _minute_finder(2) elif span < periodsperday / 24.0: _minute_finder(5) elif span < periodsperday / 12.0: _minute_finder(15) elif span < periodsperday / 6.0: _minute_finder(30) elif span < periodsperday / 2.5: _hour_finder(1, False) elif span < periodsperday / 1.5: _hour_finder(2, False) elif span < periodsperday * 1.25: _hour_finder(3, False) elif span < periodsperday * 2.5: _hour_finder(6, True) elif span < periodsperday * 4: _hour_finder(12, True) else: info_maj[month_start] = True info_min[day_start] = True year_start = period_break(dates_, 'year') info_fmt = info['fmt'] info_fmt[day_start] = '%d' info_fmt[month_start] = '%d\n%b' info_fmt[year_start] = '%d\n%b\n%Y' if not has_level_label(year_start, vmin_orig): if not has_level_label(month_start, vmin_orig): info_fmt[first_label(day_start)] = '%d\n%b\n%Y' else: info_fmt[first_label(month_start)] = '%d\n%b\n%Y' # Case 2. Less than three months elif span <= periodsperyear // 4: month_start = period_break(dates_, 'month') info_maj[month_start] = True if freq < _c.FR_HR: info['min'] = True else: day_start = period_break(dates_, 'day') info['min'][day_start] = True week_start = period_break(dates_, 'week') year_start = period_break(dates_, 'year') info_fmt[week_start] = '%d' info_fmt[month_start] = '\n\n%b' info_fmt[year_start] = '\n\n%b\n%Y' if not has_level_label(year_start, vmin_orig): if not has_level_label(month_start, vmin_orig): info_fmt[first_label(week_start)] = '\n\n%b\n%Y' else: info_fmt[first_label(month_start)] = '\n\n%b\n%Y' # Case 3. Less than 14 months ............... elif span <= 1.15 * periodsperyear: year_start = period_break(dates_, 'year') month_start = period_break(dates_, 'month') week_start = period_break(dates_, 'week') info_maj[month_start] = True info_min[week_start] = True info_min[year_start] = False info_min[month_start] = False info_fmt[month_start] = '%b' info_fmt[year_start] = '%b\n%Y' if not has_level_label(year_start, vmin_orig): info_fmt[first_label(month_start)] = '%b\n%Y' # Case 4. Less than 2.5 years ............... elif span <= 2.5 * periodsperyear: year_start = period_break(dates_, 'year') quarter_start = period_break(dates_, 'quarter') month_start = period_break(dates_, 'month') info_maj[quarter_start] = True info_min[month_start] = True info_fmt[quarter_start] = '%b' info_fmt[year_start] = '%b\n%Y' # Case 4. Less than 4 years ................. elif span <= 4 * periodsperyear: year_start = period_break(dates_, 'year') month_start = period_break(dates_, 'month') info_maj[year_start] = True info_min[month_start] = True info_min[year_start] = False month_break = dates_[month_start].month jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] info_fmt[jan_or_jul] = '%b' info_fmt[year_start] = '%b\n%Y' # Case 5. Less than 11 years ................ elif span <= 11 * periodsperyear: year_start = period_break(dates_, 'year') quarter_start = period_break(dates_, 'quarter') info_maj[year_start] = True info_min[quarter_start] = True info_min[year_start] = False info_fmt[year_start] = '%Y' # Case 6. More than 12 years ................ else: year_start = period_break(dates_, 'year') year_break = dates_[year_start].years nyears = span / periodsperyear (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) major_idx = year_start[(year_break % maj_anndef == 0)] info_maj[major_idx] = True minor_idx = year_start[(year_break % min_anndef == 0)] info_min[minor_idx] = True info_fmt[major_idx] = '%Y' #............................................ return info
import scikits.statsmodels.api as sm import numpy as np import pandas # Getting started # --------------- data = sm.datasets.sunspots.load() # Right now an annual date series must be datetimes at the end of the year. # We can use scikits.timeseries and datetime to create this array. import datetime import scikits.timeseries as ts dates = ts.date_array(start_date=1700, length=len(data.endog), freq='A') # To make an array of datetime types, we need an integer array of ordinals #.. from datetime import datetime #.. dt_dates = dates.toordinal().astype(int) #.. dt_dates = np.asarray([datetime.fromordinal(i) for i in dt_dates]) dt_dates = dates.tolist() # Using Pandas # ------------ # Make a pandas TimeSeries or DataFrame endog = pandas.Series(data.endog, index=dt_dates) # and instantiate the model ar_model = sm.tsa.AR(endog, freq='A')
import datetime from matplotlib.finance import quotes_historical_yahoo import scikits.timeseries as ts import scikits.timeseries.lib.tstables as tstab startdate = datetime.date(2002, 1, 5) enddate = datetime.date(2003, 12, 1) # retrieve data from yahoo. # Data format is [(d, open, close, high, low, volume), ...] where d is # a floating point representation of the number of days since 01-01-01 UTC quotes = quotes_historical_yahoo('INTC', startdate, enddate) # Create a DateArray of daily dates and convert it to business day frequency dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS') opens = [q[1] for q in quotes] # opens: the data portion of the timeserie # dates: the date portion of the timeserie raw_series = ts.time_series(opens, dates) test_series = raw_series #test_series = ts.fill_missing_dates(raw_series, fill_value=-1) # Write to a PyTables file output_dir = '../timeseries' try: os.mkdir(output_dir) except OSError: pass
* pandas is missing GroupBy in the docs, but the docstring is helpful * both la and pandas handle datetime objects as object arrays * tabular requires conversion to structured dtype, but easy helper functions or methods are available in scikits.timeseries and tabular * not too bad for a first try Created on Sat Jan 30 08:33:11 2010 Author: josef-pktd """ from statsmodels.compat.python import lrange, zip import numpy as np import scikits.timeseries as ts s = ts.time_series([1,2,3,4,5], dates=ts.date_array(["2001-01","2001-01", "2001-02","2001-03","2001-03"],freq="M")) print('\nUsing la') import la dta = la.larry(s.data, label=[lrange(len(s.data))]) dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))]) s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M")) s2u = ts.remove_duplicated_dates(s2) print(repr(s)) print(dat) print(repr(s2)) print(repr(s2u)) print('\nUsing pandas') import pandas
""" Look at some macro plots, then do some VARs and IRFs. """ import numpy as np import statsmodels.api as sm import scikits.timeseries as ts import scikits.timeseries.lib.plotlib as tplt data = sm.datasets.macrodata.load(as_pandas=False) data = data.data ### Create Timeseries Representations of a few vars dates = ts.date_array(start_date=ts.Date('Q', year=1959, quarter=1), end_date=ts.Date('Q', year=2009, quarter=3)) ts_data = data[['realgdp','realcons','cpi']].view(float).reshape(-1,3) ts_data = np.column_stack((ts_data, (1 - data['unemp']/100) * data['pop'])) ts_series = ts.time_series(ts_data, dates) fig = tplt.tsfigure() fsp = fig.add_tsplot(221) fsp.tsplot(ts_series[:,0],'-') fsp.set_title("Real GDP") fsp = fig.add_tsplot(222) fsp.tsplot(ts_series[:,1],'r-') fsp.set_title("Real Consumption") fsp = fig.add_tsplot(223) fsp.tsplot(ts_series[:,2],'g-')
def _daily_finder(vmin, vmax, freq): periodsperday = -1 if freq >= _c.FR_HR: if freq == _c.FR_SEC: periodsperday = 24 * 60 * 60 elif freq == _c.FR_MIN: periodsperday = 24 * 60 elif freq == _c.FR_HR: periodsperday = 24 else: raise ValueError("unexpected frequency: %s" % check_freq_str(freq)) periodsperyear = 365 * periodsperday periodspermonth = 28 * periodsperday elif freq == _c.FR_BUS: periodsperyear = 261 periodspermonth = 19 elif freq == _c.FR_DAY: periodsperyear = 365 periodspermonth = 28 elif get_freq_group(freq) == _c.FR_WK: periodsperyear = 52 periodspermonth = 3 elif freq == _c.FR_UND: periodsperyear = 100 periodspermonth = 10 else: raise ValueError("unexpected frequency") # save this for later usage vmin_orig = vmin (vmin, vmax) = (int(vmin), int(vmax)) span = vmax - vmin + 1 dates_ = date_array(start_date=Date(freq, vmin), end_date=Date(freq, vmax)) # Initialize the output info = np.zeros(span, dtype=[("val", int), ("maj", bool), ("min", bool), ("fmt", "|S20")]) info["val"][:] = np.arange(vmin, vmax + 1) info["fmt"][:] = "" info["maj"][[0, -1]] = True # .. and set some shortcuts info_maj = info["maj"] info_min = info["min"] info_fmt = info["fmt"] def first_label(label_flags): if (label_flags[0] == 0) and (label_flags.size > 1) and ((vmin_orig % 1) > 0.0): return label_flags[1] else: return label_flags[0] # Case 1. Less than a month if span <= periodspermonth: day_start = period_break(dates_, "day") month_start = period_break(dates_, "month") def _hour_finder(label_interval, force_year_start): _hour = dates_.hour _prev_hour = (dates_ - 1).hour hour_start = (_hour - _prev_hour) != 0 info_maj[day_start] = True info_min[hour_start & (_hour % label_interval == 0)] = True year_start = period_break(dates_, "year") info_fmt[hour_start & (_hour % label_interval == 0)] = "%H:%M" info_fmt[day_start] = "%H:%M\n%d-%b" info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" if force_year_start and not has_level_label(year_start, vmin_orig): info_fmt[first_label(day_start)] = "%H:%M\n%d-%b\n%Y" def _minute_finder(label_interval): hour_start = period_break(dates_, "hour") _minute = dates_.minute _prev_minute = (dates_ - 1).minute minute_start = (_minute - _prev_minute) != 0 info_maj[hour_start] = True info_min[minute_start & (_minute % label_interval == 0)] = True year_start = period_break(dates_, "year") info_fmt = info["fmt"] info_fmt[minute_start & (_minute % label_interval == 0)] = "%H:%M" info_fmt[day_start] = "%H:%M\n%d-%b" info_fmt[year_start] = "%H:%M\n%d-%b\n%Y" def _second_finder(label_interval): minute_start = period_break(dates_, "minute") _second = dates_.second _prev_second = (dates_ - 1).second second_start = (_second - _prev_second) != 0 info["maj"][minute_start] = True info["min"][second_start & (_second % label_interval == 0)] = True year_start = period_break(dates_, "year") info_fmt = info["fmt"] info_fmt[second_start & (_second % label_interval == 0)] = "%H:%M:%S" info_fmt[day_start] = "%H:%M:%S\n%d-%b" info_fmt[year_start] = "%H:%M:%S\n%d-%b\n%Y" if span < periodsperday / 12000.0: _second_finder(1) elif span < periodsperday / 6000.0: _second_finder(2) elif span < periodsperday / 2400.0: _second_finder(5) elif span < periodsperday / 1200.0: _second_finder(10) elif span < periodsperday / 800.0: _second_finder(15) elif span < periodsperday / 400.0: _second_finder(30) elif span < periodsperday / 150.0: _minute_finder(1) elif span < periodsperday / 70.0: _minute_finder(2) elif span < periodsperday / 24.0: _minute_finder(5) elif span < periodsperday / 12.0: _minute_finder(15) elif span < periodsperday / 6.0: _minute_finder(30) elif span < periodsperday / 2.5: _hour_finder(1, False) elif span < periodsperday / 1.5: _hour_finder(2, False) elif span < periodsperday * 1.25: _hour_finder(3, False) elif span < periodsperday * 2.5: _hour_finder(6, True) elif span < periodsperday * 4: _hour_finder(12, True) else: info_maj[month_start] = True info_min[day_start] = True year_start = period_break(dates_, "year") info_fmt = info["fmt"] info_fmt[day_start] = "%d" info_fmt[month_start] = "%d\n%b" info_fmt[year_start] = "%d\n%b\n%Y" if not has_level_label(year_start, vmin_orig): if not has_level_label(month_start, vmin_orig): info_fmt[first_label(day_start)] = "%d\n%b\n%Y" else: info_fmt[first_label(month_start)] = "%d\n%b\n%Y" # Case 2. Less than three months elif span <= periodsperyear // 4: month_start = period_break(dates_, "month") info_maj[month_start] = True if freq < _c.FR_HR: info["min"] = True else: day_start = period_break(dates_, "day") info["min"][day_start] = True week_start = period_break(dates_, "week") year_start = period_break(dates_, "year") info_fmt[week_start] = "%d" info_fmt[month_start] = "\n\n%b" info_fmt[year_start] = "\n\n%b\n%Y" if not has_level_label(year_start, vmin_orig): if not has_level_label(month_start, vmin_orig): info_fmt[first_label(week_start)] = "\n\n%b\n%Y" else: info_fmt[first_label(month_start)] = "\n\n%b\n%Y" # Case 3. Less than 14 months ............... elif span <= 1.15 * periodsperyear: year_start = period_break(dates_, "year") month_start = period_break(dates_, "month") week_start = period_break(dates_, "week") info_maj[month_start] = True info_min[week_start] = True info_min[year_start] = False info_min[month_start] = False info_fmt[month_start] = "%b" info_fmt[year_start] = "%b\n%Y" if not has_level_label(year_start, vmin_orig): info_fmt[first_label(month_start)] = "%b\n%Y" # Case 4. Less than 2.5 years ............... elif span <= 2.5 * periodsperyear: year_start = period_break(dates_, "year") quarter_start = period_break(dates_, "quarter") month_start = period_break(dates_, "month") info_maj[quarter_start] = True info_min[month_start] = True info_fmt[quarter_start] = "%b" info_fmt[year_start] = "%b\n%Y" # Case 4. Less than 4 years ................. elif span <= 4 * periodsperyear: year_start = period_break(dates_, "year") month_start = period_break(dates_, "month") info_maj[year_start] = True info_min[month_start] = True info_min[year_start] = False month_break = dates_[month_start].month jan_or_jul = month_start[(month_break == 1) | (month_break == 7)] info_fmt[jan_or_jul] = "%b" info_fmt[year_start] = "%b\n%Y" # Case 5. Less than 11 years ................ elif span <= 11 * periodsperyear: year_start = period_break(dates_, "year") quarter_start = period_break(dates_, "quarter") info_maj[year_start] = True info_min[quarter_start] = True info_min[year_start] = False info_fmt[year_start] = "%Y" # Case 6. More than 12 years ................ else: year_start = period_break(dates_, "year") year_break = dates_[year_start].years nyears = span / periodsperyear (min_anndef, maj_anndef) = _get_default_annual_spacing(nyears) major_idx = year_start[(year_break % maj_anndef == 0)] info_maj[major_idx] = True minor_idx = year_start[(year_break % min_anndef == 0)] info_min[minor_idx] = True info_fmt[major_idx] = "%Y" # ............................................ return info
def getMsgStats(fromFunc=False): companyId = session.get("companyId") condFilter = [ 1==1 ] if companyId: condFilter.append(Message.companyId==companyId) if request.form.get('startDate'): condFilter.append(Message.rdate > request.form['startDate']) db_result = db_session.query(func.date(Message.rdate).label("rdate"), func.count(func.IF(Message.msgType==1,1,None)), func.count(func.IF(Message.msgType==2,1,None)))\ .group_by( func.date(Message.rdate) )\ .filter(func.date(Message.rdate) <= request.form['endDate'])\ .filter(*condFilter).all() db_dates = list() db_cnt1 = list() db_cnt2 = list() for row in db_result: db_dates.append(row[0]) db_cnt1.append(row[1]) db_cnt2.append(row[2]) stats = list() dateAry = ts.date_array(db_dates, freq='D') timeSrz1 = ts.time_series(db_cnt1, dateAry) timeSrz2 = ts.time_series(db_cnt2, dateAry) fillVals1 = timeSrz1.fill_missing_dates(fill_value=0) fillVals2 = timeSrz2.fill_missing_dates(fill_value=0) fillVals1 = fillVals1.filled(0) fillVals2 = fillVals2.filled(0) if request.form.get('startDate'): startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d') else : startDate = '2015-10-27' endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d') fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D') firstDayIdx = 0 for day in fillDateAry : if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] : stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 }) firstDayIdx += 1 continue else : break for idx, msg1cnt in enumerate(fillVals1): msg2cnt = fillVals2[idx] stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "msg1" : msg1cnt, "msg2" : msg2cnt }) statsLen = len(stats) for i in range(len(fillDateAry) - len(stats)) : stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "msg1" : 0, "msg2" : 0 }) if fromFunc==True : return stats else : return jsonify({"stats" : stats})
_attrs) _methods = {'predict': 'dates'} _wrap_methods = wrap.union_dicts( base.LikelihoodResultsWrapper._wrap_methods, _methods) wrap.populate_wrapper(TimeSeriesResultsWrapper, TimeSeriesModelResults) if __name__ == "__main__": import scikits.statsmodels.api as sm import datetime import pandas data = sm.datasets.macrodata.load() #make a DataFrame #TODO: attach a DataFrame to some of the datasets, for quicker use dates = [str(int(x[0])) +':'+ str(int(x[1])) \ for x in data.data[['year','quarter']]] try: import scikits.timeseries as ts ts_dates = date_array(start_date=Date(year=1959, quarter=1, freq='Q'), length=len(data.data)) except: pass df = pandas.DataFrame(data.data[['realgdp', 'realinv', 'realcons']], index=dates) ex_mod = TimeSeriesModel(df) #ts_series = pandas.TimeSeries()
def getGoodworkStats(fromFunc=False): db_dates1 = list() db_dates2 = list() db_dates3 = list() db_cnt1 = list() db_cnt2 = list() db_cnt3 = list() companyId = session.get("companyId") condFilter = [ 1==1 ] if companyId: condFilter.append(GoodPost.companyId==companyId) if request.form.get('startDate'): condFilter.append(GoodPost.rdate > request.form['startDate']) db_result1 = db_session.query(func.date(GoodPost.rdate).label("rdate"), func.count())\ .group_by( func.date(GoodPost.rdate) )\ .filter(func.date(GoodPost.rdate) <= request.form['endDate'])\ .filter(*condFilter).all() for row in db_result1: db_dates1.append(row[0]) db_cnt1.append(row[1]) condFilter2 = [ 1==1 ] if request.form.get('startDate'): condFilter2.append(GoodLike.rdate > request.form['startDate']) if companyId: db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\ .join(User, User.userId==GoodLike.userId)\ .group_by( func.date(GoodLike.rdate) )\ .filter(func.date(GoodLike.rdate) <= request.form['endDate'])\ .filter(User.companyId==companyId)\ .filter(*condFilter2).all() else : db_result2 = db_session.query(func.date(GoodLike.rdate).label("rdate"), func.count())\ .group_by( func.date(GoodLike.rdate) )\ .filter(func.date(GoodLike.rdate) <= request.form['endDate'])\ .filter(*condFilter2).all() for row in db_result2: db_dates2.append(row[0]) db_cnt2.append(row[1]) condFilter3 = [ 1==1 ] if request.form.get('startDate'): condFilter3.append(GoodReply.rdate > request.form['startDate']) if companyId: db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\ .join(User, User.userId==GoodReply.userId)\ .group_by( func.date(GoodReply.rdate) )\ .filter(func.date(GoodReply.rdate) <= request.form['endDate'])\ .filter(User.companyId==companyId)\ .filter(*condFilter3).all() else : db_result3 = db_session.query(func.date(GoodReply.rdate).label("rdate"), func.count())\ .group_by( func.date(GoodReply.rdate) )\ .filter(func.date(GoodReply.rdate) <= request.form['endDate'])\ .filter(*condFilter3).all() for row in db_result3: db_dates3.append(row[0]) db_cnt3.append(row[1]) dateAry1 = ts.date_array(db_dates1, freq='D') dateAry2 = ts.date_array(db_dates2, freq='D') dateAry3 = ts.date_array(db_dates3, freq='D') timeSrz1 = ts.time_series(db_cnt1, dateAry1) timeSrz2 = ts.time_series(db_cnt2, dateAry2) timeSrz3 = ts.time_series(db_cnt3, dateAry3) fillVals1 = timeSrz1.fill_missing_dates(fill_value=0) fillVals2 = timeSrz2.fill_missing_dates(fill_value=0) fillVals3 = timeSrz3.fill_missing_dates(fill_value=0) fillVals1 = fillVals1.filled(0) fillVals2 = fillVals2.filled(0) fillVals3 = fillVals3.filled(0) if request.form.get('startDate'): startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d') else : startDate = '2015-10-27' endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d') fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D') if len(dateAry1)>0 and len(dateAry2)>0 and dateAry1[0] > dateAry2[0] : if len(dateAry3)>0 and dateAry2[0] > dateAry3[0] : minDate = dateAry3[0] else : minDate = dateAry2[0] else : if (len(dateAry1)>0 and len(dateAry3)>0 and dateAry1[0] > dateAry3[0]) or len(dateAry1)==0 : minDate = len(dateAry3)>0 and dateAry3[0] or None else: minDate = len(dateAry1)>0 and dateAry1[0] or None postStats = list() likeStats = list() replyStats = list() firstDayIdx1 = 0 for day in fillDateAry : if minDate != fillDateAry[firstDayIdx1] : postStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "post" : 0 }) likeStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "like" : 0 }) replyStats.append({ "date" : fillDateAry[firstDayIdx1].strftime("%m/%d"), "reply" : 0 }) firstDayIdx1 += 1 continue else : break firstDayIdx2 = 0 for day in fillDateAry : if len(dateAry2)==0 or dateAry2[0] != fillDateAry[firstDayIdx2] : if len(dateAry1)>0 and minDate!=dateAry1[0]: postStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "post" : 0 }) if len(dateAry2)>0 and minDate!=dateAry2[0]: likeStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "like" : 0 }) if len(dateAry3)>0 and minDate!=dateAry3[0]: replyStats.append({ "date" : fillDateAry[firstDayIdx2].strftime("%m/%d"), "reply" : 0 }) firstDayIdx2 += 1 continue else : break firstDayIdx3 = 0 for day in fillDateAry : if len(dateAry3)==0 or dateAry3[0] != fillDateAry[firstDayIdx3] : if len(dateAry1)>0 and minDate!=dateAry1[0]: postStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "post" : 0 }) if len(dateAry2)>0 and minDate!=dateAry2[0]: likeStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "like" : 0 }) if len(dateAry3)>0 and minDate!=dateAry3[0]: replyStats.append({ "date" : fillDateAry[firstDayIdx3].strftime("%m/%d"), "reply" : 0 }) firstDayIdx3 += 1 continue else : break for idx, post in enumerate(fillVals1): postStats.append({ "date" : fillDateAry[firstDayIdx1 + idx].strftime("%m/%d"), "post" : post }) for idx, like in enumerate(fillVals2): likeStats.append({ "date" : fillDateAry[firstDayIdx2 + idx].strftime("%m/%d"), "like" : like }) for idx, reply in enumerate(fillVals3): replyStats.append({ "date" : fillDateAry[firstDayIdx3 + idx].strftime("%m/%d"), "reply" : reply }) postLen = len(postStats) likeLen = len(likeStats) replyLen = len(replyStats) for i in range(len(fillDateAry) - postLen) : postStats.append({ "date" : fillDateAry[postLen + i].strftime("%m/%d"), "post" : 0 }) for i in range(len(fillDateAry) - likeLen) : likeStats.append({ "date" : fillDateAry[likeLen + i].strftime("%m/%d"), "like" : 0 }) for i in range(len(fillDateAry) - replyLen) : replyStats.append({ "date" : fillDateAry[replyLen + i].strftime("%m/%d"), "reply" : 0 }) #merge stats = list() for idx, postStat in enumerate(postStats): mergedStats = postStat.copy() mergedStats.update(likeStats[idx]) mergedStats.update(replyStats[idx]) stats.append(mergedStats) if fromFunc==True : return stats else : return jsonify({"stats" : stats})
def __call__(self, *tseries, **kwargs): """ generate a report. Parameter values are not saved to the Report instance. Parameters ---------- Accepts same parameters as __init__ method of Report class """ option_dict = copy.copy(self.options) option_dict.update(self.__make_dict(**kwargs)) if len(tseries) == 0: tseries = self.tseries def option(kw): return option_dict.get(kw, _default_options[kw]) dates = option('dates') header_row = option('header_row') header_char = option('header_char') header_justify = option('header_justify') row_char = option('row_char') footer_label = option('footer_label') footer_char = option('footer_char') footer_func = option('footer_func') delim = option('delim') justify = option('justify') prefix = option('prefix') postfix = option('postfix') mask_rep = option('mask_rep') datefmt = option('datefmt') fmt_func = option('fmt_func') wrap_func = option('wrap_func') col_width = option('col_width') nls=option('nls') output=option('output') fixed_width=option('fixed_width') if header_row is not None: has_header=True if len(header_row) == len(tseries)+1: # label for date column included rows = [header_row] elif len(header_row) == len(tseries): # label for date column not included rows = [['']+header_row] else: raise ValueError("mismatch with number of headers and series") else: has_header=False rows=[] if fixed_width: def _standardize_justify(userspec): if isinstance(userspec, str): # justify all columns the the same way return [userspec for x in range(len(tseries)+1)] elif isinstance(userspec, list): if len(userspec) == len(tseries): # justification for date column not included, so set that # to left by default return ['left'] + userspec else: raise ValueError("invalid `justify` specification") if justify is not None: justify = _standardize_justify(justify) else: # default column justification justify = ['left'] for ser in tseries: if ser.dtype.char in 'SUO': justify.append('left') else: justify.append('right') if header_justify is not None: header_justify = _standardize_justify(header_justify) else: # default column justification header_justify = ['left' for x in range(len(tseries)+1)] else: justify = ['none' for x in range(len(tseries)+1)] header_justify = justify if datefmt is None: def datefmt_func(date): return str(date) else: def datefmt_func(date): return date.strftime(datefmt) if dates is None: tseries = ts.align_series(*tseries) dates = ts.date_array(start_date=tseries[0].start_date, end_date=tseries[0].end_date) else: tseries = ts.align_series(start_date=dates[0], end_date=dates[-1], *tseries) if isinstance(fmt_func, list): fmt_func = [fmt_func_wrapper(f, mask_rep) for f in fmt_func] else: fmt_func = [fmt_func_wrapper(fmt_func, mask_rep)]*len(tseries) def wrap_func_default(func): if func is None: return lambda x:x else: return func if isinstance(wrap_func, list): if len(wrap_func) == len(tseries): wrap_func = [lambda x: x] + wrap_func wrap_func = [wrap_func_default(func) for func in wrap_func] else: wrap_func = [wrap_func_default(wrap_func) for x in range(len(tseries)+1)] if isinstance(col_width, list): if len(col_width) == len(tseries): col_width = [None] + col_width else: col_width = [col_width for x in range(len(tseries)+1)] _sd = dates[0] for d in dates: rows.append( [datefmt_func(d)] + \ [fmt_func[i](ser.series[d - _sd]) \ for i, ser in enumerate(tseries)] ) if footer_func is not None: has_footer=True if not isinstance(footer_func, list): footer_func = [footer_func]*len(tseries) if footer_label is None: footer_label = [''] else: footer_label = [footer_label] footer_data = [] has_missing = dates.has_missing_dates() for i, ser in enumerate(tseries): if footer_func[i] is None: footer_data.append('') else: if has_missing: _input = ser[dates] else: _input = ser.series footer_data.append(fmt_func[i](footer_func[i](_input))) rows.append(footer_label + footer_data) else: has_footer=False def rowWrapper(row): newRows = [wrap_func[i](item).split('\n') for i, item in enumerate(row)] return [[(substr or '') for substr in item] for item in map(None, *newRows)] # break each logical row into one or more physical ones logicalRows = [rowWrapper(row) for row in rows] numLogicalRows = len(logicalRows) # columns of physical rows columns = map(None,*reduce(operator.add,logicalRows)) numCols = len(columns) colNums = list(range(numCols)) # get the maximum of each column by the string length of its items maxWidths = [max(col_width[i], *[len(str(item)) for item in column]) for i, column in enumerate(columns)] def getSeparator(char, separate): if char is not None and separate: return char * (len(prefix) + len(postfix) + sum(maxWidths) + \ len(delim)*(len(maxWidths)-1)) else: return None header_separator = getSeparator(header_char, has_header) footer_separator = getSeparator(footer_char, has_footer) row_separator = getSeparator(row_char, True) # select the appropriate justify method justify_funcs = {'center':str.center, 'right':str.rjust, 'left':str.ljust, 'none':(lambda text, width: text)} if has_header and has_footer: data_start = 1 data_end = numLogicalRows-3 elif has_header: data_start = 1 data_end = numLogicalRows-2 elif has_footer: data_start = 0 data_end = numLogicalRows-3 else: data_start = 0 data_end = numLogicalRows-2 for rowNum, physicalRows in enumerate(logicalRows): if rowNum == 0 and header_separator: _justify = header_justify else: _justify = justify def apply_justify(colNum, item, width): jfunc_key = str(_justify[colNum]).lower() jfunc = justify_funcs[jfunc_key] return jfunc(str(item), width) for row in physicalRows: output.write( prefix + \ delim.join([ apply_justify(cn, item, width) \ for (cn, item, width) in zip(colNums, row, maxWidths) ]) + \ postfix + nls) if row_separator and (data_start <= rowNum <= data_end): output.write(row_separator + nls) elif header_separator and rowNum < data_start: output.write(header_separator + nls) elif footer_separator and rowNum == data_end + 1: output.write(footer_separator + nls)
def getAccessStats(fromFunc=False): companyId = session.get("companyId") condFilter = [1==1] if request.form.get('startDate'): condFilter.append(LogAccess.rdate > request.form['startDate']) if companyId: condFilter.append(User.companyId==companyId) db_result = db_session.query(func.date(LogAccess.rdate).label("rdate"), func.count(), func.count(distinct(LogAccess.userId)))\ .group_by( func.date(LogAccess.rdate) )\ .filter(*condFilter)\ .filter(func.date(LogAccess.rdate) <= request.form['endDate'])\ .all() db_dates = list() db_pv = list() db_uv = list() for row in db_result: db_dates.append(row[0]) db_pv.append(row[1]) db_uv.append(row[2]) stats = list() dateAry = ts.date_array(db_dates, freq='D') timeSrz1 = ts.time_series(db_pv, dateAry) timeSrz2 = ts.time_series(db_uv, dateAry) fillVals1 = timeSrz1.fill_missing_dates(fill_value=0) fillVals2 = timeSrz2.fill_missing_dates(fill_value=0) fillVals1 = fillVals1.filled(0) fillVals2 = fillVals2.filled(0) if request.form.get('startDate'): startDate = datetime.strptime(request.form['startDate'], '%Y-%m-%d') else : startDate = '2015-10-27' endDate = datetime.strptime(request.form['endDate'], '%Y-%m-%d') fillDateAry = ts.date_array(start_date=startDate, end_date=endDate, freq='D') firstDayIdx = 0 for day in fillDateAry : if len(dateAry)==0 or dateAry[0] != fillDateAry[firstDayIdx] : stats.append({ "date" : fillDateAry[firstDayIdx].strftime("%m/%d"), "pv" : 0, "uv" : 0 }) firstDayIdx += 1 continue else : break for idx, pv in enumerate(fillVals1): uv = fillVals2[idx] stats.append({ "date" : fillDateAry[firstDayIdx + idx].strftime("%m/%d"), "pv" : pv, "uv" : uv }) statsLen = len(stats) for i in range(len(fillDateAry) - len(stats)) : stats.append({ "date" : fillDateAry[statsLen + i].strftime("%m/%d"), "pv" : 0, "uv" : 0 }) if fromFunc==True : return stats else : return jsonify({"stats" : stats})
def _get_tseriesA(freq,date_values,kw): v = [ int(d) for d in date_values.flatten() if not np.isnan(d) ] D = [ ts.Date(freq=str(freq),value=_v) for _v in v] date_array = ts.date_array(D) return date_array
garch11: [ 1.01258264 0.24149155 0.50479994] -2056.3877404 R include_constant=False Final Estimate: LLH: 2056.397 norm LLH: 2.056397 omega alpha1 beta1 1.0123560 0.2409589 0.5049154 ''' erro, ho, etaxo = generate_gjrgarch(20, ar, ma, mu=0.04, scale=0.01, varinnovation=np.ones(20)) if 'sp500' in examples: import tabular as tb import scikits.timeseries as ts a = tb.loadSV(r'C:\Josef\work-oth\gspc_table.csv') s = ts.time_series(a[0]['Close'][::-1], dates=ts.date_array(a[0]['Date'][::-1], freq="D")) sp500 = a[0]['Close'][::-1] sp500r = np.diff(np.log(sp500)) #plt.show()
* not too bad for a first try Created on Sat Jan 30 08:33:11 2010 Author: josef-pktd """ from statsmodels.compat.python import lrange import numpy as np import scikits.timeseries as ts import la import pandas import tabular as tb from finance import msft, ibm # hack to make it run as standalone s = ts.time_series([1,2,3,4,5], dates=ts.date_array(["2001-01","2001-01", "2001-02","2001-03","2001-03"],freq="M")) print('\nUsing la') dta = la.larry(s.data, label=[lrange(len(s.data))]) dat = la.larry(s.dates.tolist(), label=[lrange(len(s.data))]) s2 = ts.time_series(dta.group_mean(dat).x,dates=ts.date_array(dat.x,freq="M")) s2u = ts.remove_duplicated_dates(s2) print(repr(s)) print(dat) print(repr(s2)) print(repr(s2u)) print('\nUsing pandas') pdta = pandas.DataFrame(s.data, np.arange(len(s.data)), [1]) pa = pdta.groupby(dict(zip(np.arange(len(s.data)), s.dates.tolist()))).aggregate(np.mean)