def _acf(x, mode): """Computes the auto-correlation function of the time series x. Note that the computations are performed on anomalies (deviations from average). Gaps in the series are filled first, the anomalies are then computed and the missing values filled with 0. :Parameters: `x` : TimeSeries Time series. """ x = ma.array(x, copy=False, subok=True, dtype=float) if x.ndim > 1: raise ValueError("The input array should be 1D only.") # make sure there's no gap in the data if isinstance(x, TimeSeries) and x.has_missing_dates(): x = ts.fill_missing_dates(x) # m = np.logical_not(ma.getmaskarray(x)).astype(int) x = x.anom().filled(0).view(ndarray) xx = (x * x) n = len(x) # _avf = np.correlate(x, x, 'full')[n - 1:] if mode: dnm_ = np.fromiter((np.sum(x[k:] * x[:-k]) / np.sum(m[k:] * xx[:-k]) for k in range(1, n)), dtype=float) else: dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\ np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum()) for k in range(1,n)), dtype=float) poslags = _avf[1:] / dnm_ return ma.fix_invalid( np.concatenate([np.array([1.]), poslags, poslags[::-1]]))
def avf(x, periodogram=True): """Computes the auto-covariance function of the series `x`. The computations are performed on anomalies (deviations from average). Gaps in the series are filled first, anomalies are then computed and missing values filled with 0. The autocovariance at lag k, $\hat{R}(k)$, of a series {x_1,...,x_n} with mean 0 is defined as: \hat{R}(k) = \sum_{t=1}^{n-k}{y_t y_{t+k}} / \sum_{t=1}^{n-k}{a_t a_{t+k}} where $y_k = x_k$ if $x_k$ is not masked and $y_k = 0$ if $x_k$ is masked, and where $a_k = 1$ if $x_k$ is not masked and $a_k = 0$ of $x_k$ is masked. If the optional parameter `periodogram` is True, the denominator of the previous expression is $\sum_{t=1}^{n-k}{a_t a_{t+k}} + k$. Parameters ---------- x : sequence Input data. If x is a TimeSeries object, it is filled first. mode : {True, False} optional Whether to return a periodogram or a standard estimate of the autocovariance. Returns ------- avf : ma.array Autocovariance at lags [0,1,...,n,n-1,...,-1] """ x = ma.array(x, copy=False, subok=True, dtype=float) if x.ndim > 1: raise ValueError("The input array should be 1D only.") # make sure there's no gap in the data if isinstance(x, TimeSeries) and x.has_missing_dates(): x = ts.fill_missing_dates(x) # m = np.logical_not(ma.getmaskarray(x)).astype(int) x = x.anom().filled(0).view(ndarray) n = len(x) # _avf = np.correlate(x,x,'full') denom = np.correlate(m,m,'full') if periodogram: denom += np.concatenate([np.arange(n-1,0,-1), np.arange(n)]) _avf /= denom _avf = np.concatenate([_avf[n-1:],_avf[:n-1]]) return ma.fix_invalid(_avf)
def avf(x, periodogram=True): """Computes the auto-covariance function of the series `x`. The computations are performed on anomalies (deviations from average). Gaps in the series are filled first, anomalies are then computed and missing values filled with 0. The autocovariance at lag k, $\hat{R}(k)$, of a series {x_1,...,x_n} with mean 0 is defined as: \hat{R}(k) = \sum_{t=1}^{n-k}{y_t y_{t+k}} / \sum_{t=1}^{n-k}{a_t a_{t+k}} where $y_k = x_k$ if $x_k$ is not masked and $y_k = 0$ if $x_k$ is masked, and where $a_k = 1$ if $x_k$ is not masked and $a_k = 0$ of $x_k$ is masked. If the optional parameter `periodogram` is True, the denominator of the previous expression is $\sum_{t=1}^{n-k}{a_t a_{t+k}} + k$. Parameters ---------- x : sequence Input data. If x is a TimeSeries object, it is filled first. mode : {True, False} optional Whether to return a periodogram or a standard estimate of the autocovariance. Returns ------- avf : ma.array Autocovariance at lags [0,1,...,n,n-1,...,-1] """ x = ma.array(x, copy=False, subok=True, dtype=float) if x.ndim > 1: raise ValueError("The input array should be 1D only.") # make sure there's no gap in the data if isinstance(x, TimeSeries) and x.has_missing_dates(): x = ts.fill_missing_dates(x) # m = np.logical_not(ma.getmaskarray(x)).astype(int) x = x.anom().filled(0).view(ndarray) n = len(x) # _avf = np.correlate(x, x, 'full') denom = np.correlate(m, m, 'full') if periodogram: denom += np.concatenate([np.arange(n - 1, 0, -1), np.arange(n)]) _avf /= denom _avf = np.concatenate([_avf[n - 1:], _avf[:n - 1]]) return ma.fix_invalid(_avf)
def _set_annual_indices(self, minimum_size=None, reference_season=None): """ Sets the ENSO indices per periods of 12 months, starting at the first month of the reference season if any, otherwise at October. The same steps are followed as for :meth:`set_monthly_indices`. Parameters ---------- minimum_size : {None, int}, optional Minimum size for the groups of consecutive values. If None, defaults to :attr:`minimum_size`. reference_season : {None, string or sequence}, optional Reference season. If None, defaults to :attr:`reference_season`. See Also -------- :meth:`set_monthly_indices` Sets the ENSO indices for each month. """ # Get the monthly indices ..... _monthly = self.set_monthly_indices(minimum_size=minimum_size, reference_season=reference_season) # Make sure we reset the full_year flag to True (we lost it w/ set_monthly self.full_year = True # Get the annual indices refseason = self.refseason if refseason: _annual = _monthly[self.months == refseason[0]] refseason = months2code(refseason) else: _annual = _monthly[self.months == 10] _annual = adjust_endpoints(forward_fill(fill_missing_dates(_annual)), self._dates[0], self._dates[-1]) # Cache the results ........... self._cachedmonthly['indices_annual'] = _annual return _annual
def _set_annual_indices(self, minimum_size=None, reference_season=None): """ Sets the ENSO indices per periods of 12 months, starting at the first month of the reference season if any, otherwise at October. The same steps are followed as for :meth:`set_monthly_indices`. Parameters ---------- minimum_size : {None, int}, optional Minimum size for the groups of consecutive values. If None, defaults to :attr:`minimum_size`. reference_season : {None, string or sequence}, optional Reference season. If None, defaults to :attr:`reference_season`. See Also -------- :meth:`set_monthly_indices` Sets the ENSO indices for each month. """ # Get the monthly indices ..... _monthly = self.set_monthly_indices(minimum_size=minimum_size, reference_season=reference_season) # Make sure we reset the full_year flag to True (we lost it w/ set_monthly self.full_year = True # Get the annual indices refseason = self.refseason if refseason: _annual = _monthly[self.months == refseason[0]] refseason = months2code(refseason) else: _annual = _monthly[self.months == 10] _annual = adjust_endpoints(forward_fill(fill_missing_dates(_annual)), self._dates[0], self._dates[-1]) # Cache the results ........... self._cachedmonthly["indices_annual"] = _annual return _annual
def _acf(x, mode): """Computes the auto-correlation function of the time series x. Note that the computations are performed on anomalies (deviations from average). Gaps in the series are filled first, the anomalies are then computed and the missing values filled with 0. :Parameters: `x` : TimeSeries Time series. """ x = ma.array(x, copy=False, subok=True, dtype=float) if x.ndim > 1: raise ValueError("The input array should be 1D only.") # make sure there's no gap in the data if isinstance(x, TimeSeries) and x.has_missing_dates(): x = ts.fill_missing_dates(x) # m = np.logical_not(ma.getmaskarray(x)).astype(int) x = x.anom().filled(0).view(ndarray) xx = (x*x) n = len(x) # _avf = np.correlate(x,x,'full')[n-1:] if mode: dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/np.sum(m[k:]*xx[:-k]) for k in range(1,n)), dtype=float) else: dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\ np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum()) for k in range(1,n)), dtype=float) poslags = _avf[1:]/dnm_ return ma.fix_invalid(np.concatenate([np.array([1.]), poslags, poslags[::-1]]))
quotes = quotes_historical_yahoo('INTC', date1, date2) """ The dates from the yahoo quotes module get returned as integers, which happen to correspond to the integer representation of 'DAILY' frequency dates in the scikits.timeseries module. So create a DateArray of daily dates, then convert this to business day frequency afterwards. """ dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS') opens = [q[1] for q in quotes] raw_series = ts.time_series(opens, dates) """ `fill_missing_dates` will insert masked values for any missing data points. Note that you could plot the series without doing this, but it would cause missing values to be linearly interpolated rather than left empty in the plot. """ series = ts.fill_missing_dates(raw_series) fig = tpl.tsfigure() fsp = fig.add_tsplot(111) fsp.tsplot(series, '-') """ Add grid lines at start of each quarter. Grid lines appear at the major tick marks by default (which, due to the dynamic nature of the ticks for time series plots, cannot be guaranteed to be at quarter start). So if you want grid lines to appear at specific intervals, you must first specify xticks explicitly. """ dates = series.dates quarter_starts = dates[dates.quarter != (dates - 1).quarter] fsp.set_xticks(quarter_starts.tovalue()) fsp.grid() plt.show()
def weighted(self,*args): R = None (weights,[data,series]) = args logger.debug("Params are (w=%s,d=%s,s=%s)" % (weights,data,series)) if check_type(weights,str,unicode) and check_type(data,str,unicode) and check_type(series,str,unicode): logger.debug("Params are (w=%s,d=%s,s=%s)" % (weights,data,series)) Ws = weights if ',' in series: Ss = series.split(',') else: raise ValueError,"La stringa di definizione delle serie è errata" if ',' in weights: Ws = weights.split(',') else: if '$$' not in weights: raise ValueError,"La stringa di definizione dei pesi non permette la generazione di un elenco" Ws = [ weights.replace('$$',S) for S in Ss ] if ',' in data: Ds = data.split(',') else: if '$$' not in data: raise ValueError,"La stringa di definizione dei dati non permette la generazione di un elenco" Ds = [ data.replace('$$',S) for S in Ss ] if len(Ws)==len(Ds): W = 0.0 S = 0.0 for d,w in zip(Ds,Ws): wb=IS.has_key(w) db=IS.has_key(d) if wb and db: Wn = IS[w].data[0] Dn = IS[d].data try: # if isinstance(Dn,ts.TimeSeries): # Dn = np.nan_to_num(Dn) # if isinstance(W,ts.TimeSeries): # W = np.nan_to_num(W) # if isinstance(W,ts.TimeSeries): # ts.align_series(W,Dn) if isinstance(Dn,ts.TimeSeries): Dn = ts.time_series(Dn,copy=True) if isinstance(W,ts.TimeSeries): Dn = ts.convert(Dn,W.freq) ts.fill_missing_dates(Dn,dates=W.dates,fill_value=np.nan) Dn = ts.adjust_endpoints(Dn, start_date=W.start_date, end_date=W.end_date, copy=True) # _ts = np.ma.masked_invalid(Dn) # Dn = _ts.filled(0.0) # if isinstance(W,ts.TimeSeries): # print "GOò" # _report(W,Dn*Wn) W += Dn*Wn # if isinstance(W,ts.TimeSeries): # print "=" # _report(W) except Exception, exc: # if isinstance(W,ts.TimeSeries): # _report(W) # print "DN" # _report(Dn) # print w,d,"W",type(W),W,W.shape,"D",type(Dn),Dn,Dn.shape,"Wn",type(Wn),Wn raise logger.error('Non posso comporre %s * %s | %s', d,w) ValueError, "%s * %s" % (d,w) S += Wn else: if not wb: logger.error(u'la serie %s non è presente nell\'IS. ATTENZIONE i risultati dell\'aggregazione possono non essere sono attendibili',w) if not db: logger.error(u'la serie %s non è presente nell\'IS. ATTENZIONE i risultati dell\'aggregazione possono non essere sono attendibili',d) try: R = ets.Timeseries(data=W / S,name="WEIGHTED(\"%s\",\"%s\")" % (','.join(Ws),','.join(Ds))) except ZeroDivisionError, exc: logger.warn('ZeroDivisionError') R = None
""" The dates from the yahoo quotes module get returned as integers, which happen to correspond to the integer representation of 'DAILY' frequency dates in the scikits.timeseries module. So create a DateArray of daily dates, then convert this to business day frequency afterwards. """ dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS') opens = [q[1] for q in quotes] raw_series = ts.time_series(opens, dates) """ `fill_missing_dates` will insert masked values for any missing data points. Note that you could plot the series without doing this, but it would cause missing values to be linearly interpolated rather than left empty in the plot. """ series = ts.fill_missing_dates(raw_series) fig = tpl.tsfigure() fsp = fig.add_tsplot(111) fsp.tsplot(series, '-') """ Add grid lines at start of each quarter. Grid lines appear at the major tick marks by default (which, due to the dynamic nature of the ticks for time series plots, cannot be guaranteed to be at quarter start). So if you want grid lines to appear at specific intervals, you must first specify xticks explicitly. """ dates = series.dates quarter_starts = dates[dates.quarter != (dates-1).quarter] fsp.set_xticks(quarter_starts.tovalue()) fsp.grid()