def test_convert_to_annual(self): "Test convert_to_annual" base = dict(D=1, H=24, T=24 * 60, S=24 * 3600) #for fq in ('D', 'H', 'T', 'S'): # Don't test for minuTe and Second frequency, too time consuming. for fq in ('D', 'H'): dates = date_array(start_date=Date(fq, '2001-01-01 00:00:00'), end_date=Date(fq, '2004-12-31 23:59:59')) bq = base[fq] series = time_series(range(365 * bq) * 3 + range(366 * bq), dates=dates) control = ma.masked_all((4, 366 * bq), dtype=series.dtype) control[0, :58 * bq] = range(58 * bq) control[0, 59 * bq:] = range(58 * bq, 365 * bq) control[[1, 2]] = control[0] control[3] = range(366 * bq) test = convert_to_annual(series) assert_equal(test, control) # series = time_series(range(59, 365) + range(366) + range(365), start_date=Date('D', '2003-03-01')) test = convert_to_annual(series) assert_equal(test[:, 59:62], ma.masked_values([[-1, 59, 60], [59, 60, 61], [-1, 59, 60]], - 1))
def op(func,x,y,l='?'): logger.debug("TYPE XY=%s,%s",type(x),type(y)) if type(x)!=Timeseries: if type(y)==Timeseries: return op(func,y,x,l) else: raise ValueError, "operands not timeseries" if type(x)==Timeseries: if type(y)==Timeseries: r = func(x,y) return r elif type(y) in (int,float,np.float64): _ts1 = x._data _ts2 = ts.time_series(_ts1, copy=True) _ts2.data.fill(y) _ts3 = ts.time_series(func(_ts1,_ts2),copy=True) try: _ts3.adjust_endpoints() #start_date=_ts1.start_date,end_date=_ts1.end_date) except ts.tseries.TimeSeriesError, exc: logger.debug("{TS OP} exception: %s",exc.value) _ts3.compressed() name='%s%s%s' % (x.name,l,str(y)) if not _ts3.is_valid(): _ts3.fill_missing_dates() _tr =Timeseries(data=_ts3, name=name) return _tr logger.error("1 operand should be a Timeseries other could be number") raise ValueError, "1 operand should be a Timeseries other could be number (%s,%s)"%(type(x),type(y))
def test_convert(self): series = self.series series.thresholds = (-0.5, +0.5) series.minimum_size = 5 _cached = series._cachedmonthly.get('indices_monthly', None) self.failUnless(_cached is None) control = [ 0, 0, 0,+1,+1,+1,+1,+1,+1, 0,-1,-1, -1,-1,-1,-1,-1, 0, 0,-1,-1,-1,-1,-1, -1,+1,+1,+1,+1,+1,+1, 0, 0, 0, 0, 0, 0, 0, 0,-1,-1,-1,-1,-1, 0, 0,+1,+1, +1,+1,+1,+1,+1,+1,+1, 0, 0, 0, 0, 0,] control = ts.time_series(control, dates=series._dates) assert_equal(series.indices, control) # Convert to daily dseries = series.convert('D') dcontrol = ts.lib.backward_fill(control.convert('D')) assert_equal(dseries.indices, dcontrol) # control = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, -1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,+1,+1, +1,+1,+1,+1,+1,+1,+1,+1,+1,+1, 0, 0,] control = ts.time_series(control, dates=series._dates) assert_equal(dseries.set_indices(full_year=True, reference_season='NDJ'), ts.lib.backward_fill(control.convert('D')))
def common_ts_setup(): series2D = ts.time_series([np.random.rand(25).reshape(5,5), np.random.rand(25).reshape(5,5), np.random.rand(25).reshape(5,5), np.random.rand(25).reshape(5,5), np.random.rand(25).reshape(5,5),], start_date=ts.now('M'), mask=[np.random.rand(25).reshape(5,5)>.5, np.random.rand(25).reshape(5,5)>.5, np.random.rand(25).reshape(5,5)>.5, np.random.rand(25).reshape(5,5)>.5, np.random.rand(25).reshape(5,5)>.5,] ) series1D = ts.time_series(np.random.rand(25), mask=np.random.rand(25)>0.7, start_date=ts.now('M'), fill_value=-999) series5V = ts.time_series(np.random.rand(25).reshape(5,5), mask=np.random.rand(25).reshape(5,5)>0.7, start_date=ts.now('M')) series5N = ts.time_series(zip(np.random.rand(5), np.random.rand(5), np.arange(5)), start_date=ts.now('M'), dtype=[('a',float),('b',float),('c',int)] ) return dict(series1D=series1D, series5V=series5V, series2D=series2D, series5N=series5N)
def test_get_field_asattribute(self): "Tests item retrieval" [d, m, mrec, dlist, dates, mts, rts] = self.data self.failUnless(isinstance(rts.f0, TimeSeries)) self.failUnless(not isinstance(rts[0], TimeSeriesRecords)) assert_equal(rts.f0, time_series(d, dates=dates, mask=m)) assert_equal(rts.f1, time_series(d[::-1], dates=dates, mask=m[::-1])) self.failUnless((rts._mask == nr.fromarrays([m, m[::-1]])).all()) # Was _mask, now is recordmask assert_equal(rts.recordmask, np.r_[[m, m[::-1]]].all(0)) assert_equal(rts.f0[1], rts[1].f0)
def setup(self): a = time_series(np.random.rand(24), start_date=ts.now('M')) b = time_series(np.random.rand(24) * 100, dtype=int, start_date=ts.now('M'),) # c = time_series(["%02i" % _ for _ in np.arange(24)], # start_date=ts.now('M')) c = time_series(np.arange(24), start_date=ts.now('M')) trec = fromarrays([a, b, c], dates=a.dates, names='a,b,c') self.info = (a, b, c, trec)
def setUp(self): "Setting common information" try: from BeautifulSoup import BeautifulSoup, SoupStrainer except ImportError: self.indices = None return # Load the file as a tree, but only take the SST table (border=1) from urllib import urlopen url = "http://www.cpc.noaa.gov/products/analysis_monitoring/"\ "ensostuff/ensoyears.shtml" url = urlopen(url) table = BeautifulSoup(url.read(), parseOnlyThese=SoupStrainer("table", border=1)) # Separate it by rows, but skip the first one (the header) years = [] indices = [] color = dict(red=+1, white=0, blue=-1) deft = [(None,'color:white')] for row in table.findAll("tr")[1:]: cols = row.findAll('td') years.append(int(cols.pop(0).strong.string)) indices.append([color[getattr(_.span, 'attrs', deft)[0][-1].split(':')[-1]] for _ in cols]) start_date = ts.Date('M', year=years[0], month=1) self.indices = time_series(np.array(indices).ravel(), start_date=start_date)
def get_irt_data(date1, date2): import sqlite3 from scipy.signal import medfilt DB_FILE = "/home/Work/magn/IRT.sqlite" try: conn = sqlite3.connect(DB_FILE) except OperationalError: print "Cannot find database!" return cursor = conn.cursor() # считываем cursor.execute(""" SELECT intdt, f FROM irt_vectordata WHERE intdt BETWEEN ? AND ? ORDER BY intdt ASC """, ( ts.Date('T', datetime=date1).value,#series.dates[0].datetime).value, ts.Date('T', datetime=date2).value+1, ) ) #print date1, date2 _dates, _values = zip(*cursor.fetchall()) conn.close() #print "get series from values and dates" series = ts.time_series(medfilt(_values), dates=_dates, freq='T') # скроем пропуски = 99999.0 series[(series==99999)]=np.ma.masked return series.compressed()
def a0002(self,ts1,*_args): """Funzione usata in CHEXTERNAL-A11 Fino al duemila dieci in CHACTIVITY la seconda è shift(1) indietro""" args=_args[0] _t1 = ts1._data _ts = ts.time_series(_t1,copy=True) _da = _ts.dates _c = 0 for _i,_d in enumerate(_da[:-1]): _p = _i % 4 if _d.year<2010: if _p == 0: _c = _ts[_d+1] / 2.0 _ts[_d]=_c elif _p == 2: _c = (_ts[_d+1] - 2*_ts[_d-1]) / 2.0 _ts[_d]=_c else: _ts[_d]=_c elif _d.year>=2010: if _p > 0: _c_ = _ts[_d] _ts[_d]=_ts[_d]-_c _c = _c_ else: _c = _ts[_d] # _report(ts1._data,_ts) _res = ets.Timeseries(data=_ts) return _res
def tseries(self,_freq,*_args): args=_args[0] logger.debug('{TSERIES} freq=%s %s',_freq,args) if _freq == 'Q': year = int(args[0]) quarter = int(args[1]) vals = [ float(v) for v in args[2:] ] freq="Q" start = ts.Date(freq="Q", year=year,quarter=quarter) elif _freq == 'M': freq="M" year = int(args[0]) month = int(args[1]) vals = [ float(v) for v in args[2:] ] start = ts.Date(freq=freq, year=year, month=month) else: raise ValueError, "FREQUENZA NON PREVISTA in TSERIES" logger.debug('{TSERIES} %s',start) _ts = ts.time_series(vals, freq=freq, start_date=start) _res = ets.Timeseries(data=_ts) return _res
def setUp(self): (a, b) = (np.arange(10), np.random.rand(10)) ndtype = [('a', np.float), ('b', np.float)] tarr = ts.time_series(np.array(zip(a, b), dtype=ndtype), start_date=ts.now('M')) tarr.mask[3] = (False, True) self.data = (tarr, a, b)
def test_sorted(self): dates = [ts.Date('D', string='2007-01-%02i' % i) for i in (3, 2, 1)] (a, b) = zip(*[(3., 30), (2., 20), (1., 10), ]) ndtype = [('a', np.float), ('b', np.int)] controldates = date_array(dates, freq='D') controldates.sort_chronologically() series = time_series(zip(*(a, b)), dates, freq='D', dtype=ndtype) assert_equal(series._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(series._dates, controldates) # trec = time_records(zip(*(a, b)), dates, freq='D', dtype=ndtype) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.]) # trec = fromrecords(zip(a, b), dates, names=('a', 'b')) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.]) # trec = fromarrays([a, b], dates, names=('a', 'b')) assert_equal(trec._data.tolist(), [(1., 10), (2., 20), (3., 30)]) assert_equal(trec._dates, controldates) assert_equal(trec['a'], [1., 2., 3.]) assert_equal(trec.a, [1., 2., 3.])
def test_apply_on_fields_series(self): "Test apply_on_fields w/ time_series" adtype = [('fi', int), ('ff', float)] a = ts.time_series([(1, 1.), (2, 2.), (3, 3.)], mask=[(0, 0), (0, 1), (0, 0)], dtype=adtype, start_date=ts.now('M')) func = ma.cumsum test = apply_on_fields(a, func) control = ts.time_series([(1, 1.), (3, -1), (6, 4.)], mask=[(0, 0), (0, 1), (0, 0)], dtype=adtype, start_date=ts.now('M')) assert_equal(test, control) self.failUnless(isinstance(test, ts.TimeSeries)) assert_equal(test.dates, control.dates)
def gen_ts_from_tpm(tpm, bin_width, length, freq='T'): """ Create timeseries using a Transisiton Probability Matrix INPUT: tpm = ndarray of n*n values length (int) OUTPUT: tseries = timeseries of length """ ## Create cumulative matrix from tpm cumu_tpm = gen_cumu_tpm(tpm) ## Initial wind range starts near median source_bin = int(len(cumu_tpm) / 2) ## Create empty array for wind speed data tseries_data = [] ## Create wind speed data for index in range(length): ## Find wind range that random number falls into destination_bin = weighted_choice(cumu_tpm[source_bin]) ## Create random wind speed within range of destination bin wind_speed = (destination_bin + np.random.uniform()) * bin_width ## Add wind speed to timeseries tseries_data.append(wind_speed) ## Destination bin becomes source bin source_bin = destination_bin ## Create timeseries out of tseries_data and freq tseries = ts.time_series(data=tseries_data, start_date="01-01-2001",freq=freq) return tseries
def block_average(timeseries, new_freq=''): """ Reduce size of timeseries by taking averages of larger block size. Input: timeseries, new_freq (str) See scikits.timeseries doc Output: block averaged timeseries obj. in new frequency """ # Label timeseries data with new frequency # ie: [5.5, 4.5] | [13-May-2009 11:40 13-May-2009 11:50] becomes # [5.5, 4.5] | [13-May-2009 13-May-2009] timeseries = timeseries.asfreq(new_freq) # Create empty arrays, set first block_time current_block_values = [] averages = [] timesteps = [] current_block_time = timeseries.dates[0] # For each index in timeseries, if the block of time has changed, # average the previous block values. Otherwise keep adding # values to be averaged. for index in range(0,len(timeseries)): if current_block_time != timeseries.dates[index]: averages.append(npmean(current_block_values)) timesteps.append(current_block_time) current_block_values = [] current_block_time = timeseries.dates[index] current_block_values.append(timeseries[index]) # Take average for last (or only) time block if current_block_values: averages.append(npmean(current_block_values)) timesteps.append(current_block_time) # Return new block averages and timesteps as timeseries object return ts.time_series(averages,dates=timesteps)
def _get_year(year,d,n=1): """ Return a time-series with the same frequency of the input time-series with n complete years from input year and values taken from input series :param year: base year :type year: integer :param d: time-series object :type d: time-series :param n: number of periods to take :type n: integer :return: output time-series :rtype: time-series """ f = d.freqstr # frequenza d'ingresso nels = _ts_nels(f) # numero di elementi da considerare in un anno (M=12, Q=4, A=1) N=n*nels # Numero di elementi totali da considerare startd = d.start_date endd = d.end_date if f[0]=='M': starty = ts.Date(f,year=year,month=1) endy = ts.Date(f,year=year,month=N) elif f[0]=='Q': starty = ts.Date(f,year=year,quarter=1) endy = ts.Date(f,year=year,quarter=N) elif f[0]=='A': starty = ts.Date(f,year=year) endy = ts.Date(f,year=year+N-1) else: raise UnknownFrequencyError, f # Create a timeseries with N elements np.nan # from starty with frequency f s = ts.time_series([ np.nan for i in range(0,N)], start_date=starty, freq=f) # create date range da = ts.date_array(start_date=starty, end_date=endy, freq=f) d.fill_missing_dates() d.adjust_endpoints() # copy values from d to s d.mask=False for _d in da: s[_d]=np.nan if _d <= d.end_date: s[_d]=d[_d] else: s[_d]=np.nan return s
def test_ts_data_op01(cmd=None): import numpy as np import scikits.timeseries as ts import scikits.timeseries.lib.reportlib as rl data1 = ts.time_series(np.arange(-100.0, 100.0, 10.0), start_date=ts.Date("b", "2011-01-01")) data2 = ts.time_series(np.arange(-1000.0, 1000.0, 100.0), start_date=ts.Date("b", "2011-01-01")) data2[3] = 77.77 _ts1 = Timeseries(data=data1) _ts2 = Timeseries(data=data2) _ts3 = _ts1 * _ts2 rl.Report(_ts1._data, _ts2._data, _ts3._data)() assert _ts1.data["2011-01-17"] == 0 assert _ts1.data["2011-01-28"] != 0 assert _ts3.data["2011-01-28"] == 81000.0 assert (_ts3.data["2011-01-06"] - -5443.9) < 0.01
def test_force_reference(self): mseries = ts.time_series(np.arange(24), start_date=ts.Date('M','2001-01')) aseries = ts.time_series([1,2,3], start_date=ts.Date('A', '2001-01')) # mtest = force_reference(aseries, mseries) assert_equal(mtest.freq, ts.check_freq('M')) assert_equal(mtest.dates[[0,-1]], mseries.dates[[0,-1]]) assert_equal(mtest, [1]*12+[2]*12) mtest = force_reference(aseries, mseries, ma.sum) assert_equal(mtest, [1]*12+[2]*12) # atest = force_reference(mseries, aseries) assert_equal(atest.freq, ts.check_freq('A')) assert_equal(atest.dates[[0,-1]], aseries.dates[[0,-1]]) assert_equal(atest, ma.array([5.5, 17.5, 0], mask=[0,0,1])) atest = force_reference(mseries, aseries, ma.sum) assert_equal(atest, ma.array([66, 210, 0], mask=[0,0,1]))
def load_coaps_period_networkdata(field, freq=None, func=None, start_date=None, end_date=None, cfgdict=coaps_config): """ Load data converted the given period for all the stations Parameters ---------- field : str Type of data to select. Must be one of ('tmin', 'tmax', 'rain') freq : var, optional Period to convert the dataset to. func : function, optional Function with which to convert the dataset. The function must output a 1D dataset. start_date : var, optional Starting date of the dataset. end_date : var, optional Ending date of the dataset. Returns ------- period_networkdata Structured array of the converted data for all stations of the network. The output dtype is ``[(station_id, float)]`` for all the station ids. """ # Make sure we have a valid data type valid_field = ('tmin', 'tmax', 'rain') if field not in valid_field: errmsg = "Invalid datatype: should be in %s.\n(got '%s')" raise ValueError(errmsg % (valid_field, field)) # Check the frequency freq = ts.check_freq(freq) # Load the dictionary of adjusted data datadict = load_coaps_adjusted_networkdata(start_date=start_date, end_date=end_date, cfgdict=cfgdict) # Define the list of station ids coaps_ids = datadict.keys() # Define the output dtype ndtype = [("%s" % _, float) for _ in sorted(coaps_ids)] # Choose one series as reference and convert it reference = datadict[coaps_ids[0]] reference = reference[field].convert(freq, func=func) # Exit if we don't have a 1D series if reference.ndim != 1: errmsg = "Conversion error: the output dataset should be 1D.\n"\ "(got a %iD series instead)" raise TypeError(errmsg % reference.ndim) series = ts.time_series(np.empty(len(reference), dtype=ndtype), dates=reference.dates) series_values = series.series for (id_, data) in datadict.items(): series_values[id_] = data[field].convert(freq, func=func).series return series
def test_ts_data01(cmd=None): import numpy as np import scikits.timeseries as ts data = ts.time_series(np.arange(-100, 100, 10), start_date=ts.Date("b", "2011-01-01")) md = {"PROVA": 0, "PUNTO": 1, "ANCORA": 2} ts = Timeseries(data=data, metadata=md) # ts.report() assert ts.data["2011-01-17"] == 0 assert ts.data["2011-01-28"] != 0
def test_explicit_names_with_usecols(self): "Make sure the proper names are given to entries when usecols is not None" a = "AAA,2010,1,1,2,3\nBBB,2010,2,10,20,30" dateconv = lambda y, m: Date('M', year=int(y), month=int(m)) kwargs = dict(freq='M', delimiter=',', dateconverter=dateconv, datecols=(1, 2), usecols=(1, 2, 3, 5), names="A, C") test = tsfromtxt(StringIO.StringIO(a), **kwargs) ctrl = time_series([(1, 3), (10, 30)], start_date=Date('M', '2010-01'), dtype=[('A', int), ('C', int)]) assert_equal(test, ctrl)
def test_mov_average_expw_mask(self): "Make sure that mov_average_expw doesn't modify the initial mask" N = 256 series = ts.time_series(np.random.rand(N), start_date=ts.Date("D", "2008-01-01")) series[96:128] = ma.masked controlmask = np.zeros(N, dtype=bool) controlmask[96:128] = True # test = mf.mov_average_expw(series, 16) assert_not_equal(test.mask, series.mask) assert_equal(series.mask, controlmask)
def make_missing(self,missing_list,freq,model,fill=np.nan): """Crea elementi di una lista di nomi come serie missing alla frequenza data e riempiti con l'elemento indicato come nel modello specificato""" for m in missing_list: if m in self._missing: X_series = self[model]._data X_nans = ts.time_series(np.empty(len(X_series)), start_date=X_series.start_date, length=len(X_series) ) X_nans.fill(fill) self[m]=Timeseries(data=X_nans,name=m)
def test_append_timeseries(self): "Test appending to a MaskedTable" table = self.h5file.root.tseries tseries = self.tseries newdata = ts.time_series(zip(np.random.rand(3), np.arange(3)+10), mask=[(0,0),(1,0),(0,1)], dtype=tseries.dtype, start_date=tseries.dates[-1]+1) table.append(newdata) test = table.read() self.failUnless(isinstance(test, TimeSeries)) assert_equal_records(test, ts.concatenate((tseries,newdata)))
def __init__(self, *args, **kwds): TestCase.__init__(self, *args, **kwds) self.marray = ma.array(np.random.rand(100).reshape(10,10), mask = (np.random.rand(100).reshape(10,10) > 0.7), fill_value=-999, hard_mask=True) self.marray._optinfo['memo'] = "Some line of text" self.tseries = ts.time_series(self.marray, start_date=ts.now('D')) self.file = tempfile.mktemp(".hdf5") self.fileh = tables.openFile(self.file,'a') self.populate()
def test_with_converter(self): "Test tsfromtxt w/ an explicit converter" input = StringIO.StringIO("2001-01, 00mm\n2002-01, 10mm\n2003-01, 00mm") conv = converter = {1: lambda s:float(s.split('mm')[0])} test = tsfromtxt(input, delimiter=',', converters=conv, datecols=0, freq='M', dtype=float) control = time_series([0., 10., 0.], dates=['2001-01', '2002-01', '2003-01'], freq='M') assert(isinstance(test, TimeSeries)) assert_equal(test, control) assert_equal(test.dates, control.dates)
def test_explicit_names(self): "Test w/ explicit dtype (and explicit float)" data = "200510, 380.00, 386.30\n200511, 386.85, 388.55\n" dconverter = lambda x: Date("M", "%s-%s" % (x[:4], x[4:6])) kwargs = dict(delimiter=",", datecols=0, dateconverter=dconverter) ctrl = time_series([(380., 386.30), (386.85, 388.55)], start_date="2005-10", freq="M", dtype=[('open', "f4"), ('close', "f4")]) test = tsfromtxt(StringIO.StringIO(data), dtype=[('open', "f4"), ('close', "f4")], **kwargs) assert_equal(test, ctrl)
def test_with_negative_datecols(self): "Test negative datecols" data = "380.00, 386.30, 200510\n386.85, 388.55, 200511\n" dconverter = lambda x: Date("M", "%s-%s" % (x[:4], x[4:6])) kwargs = dict(delimiter=",", datecols= -1, dateconverter=dconverter) ctrl = time_series([(380., 386.30), (386.85, 388.55)], start_date="2005-10", freq="M", dtype=[('open', "f4"), ('close', "f4")]) test = tsfromtxt(StringIO.StringIO(data), dtype=[('open', "f4"), ('close', "f4")], **kwargs) assert_equal(test, ctrl)
def test_ontimeseries(self): "Test cmov_average on a 1D TimeSeries." data = ts.time_series(self.maskeddata, start_date=ts.now("D")) for width in [3, 5, 7]: k = (width - 1) / 2 ravg = mf.cmov_average(data, width) self.failUnless(isinstance(ravg, MaskedArray)) assert_equal(ravg, data) m = np.zeros(len(data), bool) m[:k] = m[-k:] = m[10 - k : 10 + k + 1] = True assert_equal(ravg._mask, m) assert_equal(ravg._dates, data._dates)
def __init__(self, *args, **kwds): TestCase.__init__(self, *args, **kwds) series = ts.time_series(zip(np.random.rand(10), np.arange(10)), start_date=ts.now('M'), dtype=[('a',float),('b',int)]) series.mask[0] = (0,1) series.mask[-1] = (1,0) self.tseries = series self.marray = series._series self.file = tempfile.mktemp(".hdf5") self.h5file = tables.openFile(self.file,'a') self.populate()
def attach_rows(self, result): from scikits.timeseries import time_series return time_series(result, dates = self.row_labels[-len(result):])
def attach_dates(self, result): from scikits.timeseries import time_series return time_series(result, dates = self.predict_dates)