Python fill_missing_datesの例、scikits.timeseries.fill_missing_dates Pythonの例

コード例 #1

0

ファイルを表示

def _acf(x, mode):
    """Computes the auto-correlation function of the time series x.
Note that the computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, the anomalies are then computed and the missing
values filled with 0.

:Parameters:
    `x` : TimeSeries
        Time series.
    """
    x = ma.array(x, copy=False, subok=True, dtype=float)
    if x.ndim > 1:
        raise ValueError("The input array should be 1D only.")
    # make sure there's no gap in the data
    if isinstance(x, TimeSeries) and x.has_missing_dates():
        x = ts.fill_missing_dates(x)
    #
    m = np.logical_not(ma.getmaskarray(x)).astype(int)
    x = x.anom().filled(0).view(ndarray)
    xx = (x * x)
    n = len(x)
    #
    _avf = np.correlate(x, x, 'full')[n - 1:]
    if mode:
        dnm_ = np.fromiter((np.sum(x[k:] * x[:-k]) / np.sum(m[k:] * xx[:-k])
                            for k in range(1, n)),
                           dtype=float)
    else:
        dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\
                            np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum())
                            for k in range(1,n)),
                           dtype=float)
    poslags = _avf[1:] / dnm_
    return ma.fix_invalid(
        np.concatenate([np.array([1.]), poslags, poslags[::-1]]))

コード例 #2

0

ファイルを表示

ファイル: avcf.py プロジェクト: B-Rich/scikits.timeseries-sandbox

def avf(x, periodogram=True):
    """Computes the auto-covariance function of the series `x`.
The computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, anomalies are then computed and missing
values filled with 0.
    The autocovariance at lag k, $\hat{R}(k)$, of a series {x_1,...,x_n} with
mean 0 is defined as:
\hat{R}(k) = \sum_{t=1}^{n-k}{y_t y_{t+k}} / \sum_{t=1}^{n-k}{a_t a_{t+k}}
where $y_k = x_k$ if $x_k$ is not masked and $y_k = 0$ if $x_k$ is masked, and
where $a_k = 1$ if $x_k$ is not masked and $a_k = 0$ of $x_k$ is masked.
If the optional parameter `periodogram` is True, the denominator of the previous
expression is $\sum_{t=1}^{n-k}{a_t a_{t+k}} + k$.

Parameters
----------
    x : sequence
        Input data. If x is a TimeSeries object, it is filled first.
    mode : {True, False} optional
        Whether to return a periodogram or a standard estimate of the autocovariance.

Returns
-------
    avf : ma.array
        Autocovariance at lags [0,1,...,n,n-1,...,-1]

    """
    x = ma.array(x, copy=False, subok=True, dtype=float)
    if x.ndim > 1:
        raise ValueError("The input array should be 1D only.")
    # make sure there's no gap in the data
    if isinstance(x, TimeSeries) and x.has_missing_dates():
        x = ts.fill_missing_dates(x)
    #
    m = np.logical_not(ma.getmaskarray(x)).astype(int)
    x = x.anom().filled(0).view(ndarray)
    n = len(x)
    #
    _avf = np.correlate(x,x,'full')
    denom = np.correlate(m,m,'full')
    if periodogram:
        denom += np.concatenate([np.arange(n-1,0,-1), np.arange(n)])
    _avf /= denom
    _avf = np.concatenate([_avf[n-1:],_avf[:n-1]])
    return ma.fix_invalid(_avf)

コード例 #3

0

ファイルを表示

def avf(x, periodogram=True):
    """Computes the auto-covariance function of the series `x`.
The computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, anomalies are then computed and missing
values filled with 0.
    The autocovariance at lag k, $\hat{R}(k)$, of a series {x_1,...,x_n} with
mean 0 is defined as:
\hat{R}(k) = \sum_{t=1}^{n-k}{y_t y_{t+k}} / \sum_{t=1}^{n-k}{a_t a_{t+k}}
where $y_k = x_k$ if $x_k$ is not masked and $y_k = 0$ if $x_k$ is masked, and
where $a_k = 1$ if $x_k$ is not masked and $a_k = 0$ of $x_k$ is masked.
If the optional parameter `periodogram` is True, the denominator of the previous
expression is $\sum_{t=1}^{n-k}{a_t a_{t+k}} + k$.

Parameters
----------
    x : sequence
        Input data. If x is a TimeSeries object, it is filled first.
    mode : {True, False} optional
        Whether to return a periodogram or a standard estimate of the autocovariance.

Returns
-------
    avf : ma.array
        Autocovariance at lags [0,1,...,n,n-1,...,-1]

    """
    x = ma.array(x, copy=False, subok=True, dtype=float)
    if x.ndim > 1:
        raise ValueError("The input array should be 1D only.")
    # make sure there's no gap in the data
    if isinstance(x, TimeSeries) and x.has_missing_dates():
        x = ts.fill_missing_dates(x)
    #
    m = np.logical_not(ma.getmaskarray(x)).astype(int)
    x = x.anom().filled(0).view(ndarray)
    n = len(x)
    #
    _avf = np.correlate(x, x, 'full')
    denom = np.correlate(m, m, 'full')
    if periodogram:
        denom += np.concatenate([np.arange(n - 1, 0, -1), np.arange(n)])
    _avf /= denom
    _avf = np.concatenate([_avf[n - 1:], _avf[:n - 1]])
    return ma.fix_invalid(_avf)

コード例 #4

0

ファイルを表示

ファイル: ensobase.py プロジェクト: xuexianwu/scikits.hydroclimpy

    def _set_annual_indices(self, minimum_size=None, reference_season=None):
        """
    Sets the ENSO indices per periods of 12 months, starting at the first 
    month of the reference season if any, otherwise at October.

    The same steps are followed as for :meth:`set_monthly_indices`.

    Parameters
    ----------
    minimum_size : {None, int}, optional
        Minimum size for the groups of consecutive values.
        If None, defaults to :attr:`minimum_size`.
    reference_season : {None, string or sequence}, optional
        Reference season.
        If None, defaults to :attr:`reference_season`.

    See Also
    --------
    :meth:`set_monthly_indices`
        Sets the ENSO indices for each month.

    """
        # Get the monthly indices .....
        _monthly = self.set_monthly_indices(minimum_size=minimum_size,
                                            reference_season=reference_season)
        # Make sure we reset the full_year flag to True (we lost it w/ set_monthly
        self.full_year = True
        # Get the annual indices
        refseason = self.refseason
        if refseason:
            _annual = _monthly[self.months == refseason[0]]
            refseason = months2code(refseason)
        else:
            _annual = _monthly[self.months == 10]
        _annual = adjust_endpoints(forward_fill(fill_missing_dates(_annual)),
                                   self._dates[0], self._dates[-1])
        # Cache the results ...........
        self._cachedmonthly['indices_annual'] = _annual
        return _annual

コード例 #5

0

ファイルを表示

ファイル: ensobase.py プロジェクト: dacoex/scikits.hydroclimpy

    def _set_annual_indices(self, minimum_size=None, reference_season=None):
        """
    Sets the ENSO indices per periods of 12 months, starting at the first 
    month of the reference season if any, otherwise at October.

    The same steps are followed as for :meth:`set_monthly_indices`.

    Parameters
    ----------
    minimum_size : {None, int}, optional
        Minimum size for the groups of consecutive values.
        If None, defaults to :attr:`minimum_size`.
    reference_season : {None, string or sequence}, optional
        Reference season.
        If None, defaults to :attr:`reference_season`.

    See Also
    --------
    :meth:`set_monthly_indices`
        Sets the ENSO indices for each month.

    """
        # Get the monthly indices .....
        _monthly = self.set_monthly_indices(minimum_size=minimum_size, reference_season=reference_season)
        # Make sure we reset the full_year flag to True (we lost it w/ set_monthly
        self.full_year = True
        # Get the annual indices
        refseason = self.refseason
        if refseason:
            _annual = _monthly[self.months == refseason[0]]
            refseason = months2code(refseason)
        else:
            _annual = _monthly[self.months == 10]
        _annual = adjust_endpoints(forward_fill(fill_missing_dates(_annual)), self._dates[0], self._dates[-1])
        # Cache the results ...........
        self._cachedmonthly["indices_annual"] = _annual
        return _annual

コード例 #6

0

ファイルを表示

ファイル: avcf.py プロジェクト: B-Rich/scikits.timeseries-sandbox

def _acf(x, mode):
    """Computes the auto-correlation function of the time series x.
Note that the computations are performed on anomalies (deviations from average).
Gaps in the series are filled first, the anomalies are then computed and the missing
values filled with 0.

:Parameters:
    `x` : TimeSeries
        Time series.
    """
    x = ma.array(x, copy=False, subok=True, dtype=float)
    if x.ndim > 1:
        raise ValueError("The input array should be 1D only.")
    # make sure there's no gap in the data
    if isinstance(x, TimeSeries) and x.has_missing_dates():
        x = ts.fill_missing_dates(x)
    #
    m = np.logical_not(ma.getmaskarray(x)).astype(int)
    x = x.anom().filled(0).view(ndarray)
    xx = (x*x)
    n = len(x)
    #
    _avf = np.correlate(x,x,'full')[n-1:]
    if mode:
        dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/np.sum(m[k:]*xx[:-k])
                            for k in range(1,n)),
                            dtype=float)
    else:
        dnm_ = np.fromiter((np.sum(x[k:]*x[:-k])/\
                            np.sqrt((m[k:]*xx[:-k]).sum() * (m[:-k]*xx[k:]).sum())
                            for k in range(1,n)),
                           dtype=float)
    poslags = _avf[1:]/dnm_
    return ma.fix_invalid(np.concatenate([np.array([1.]),
                                          poslags,
                                          poslags[::-1]]))

コード例 #7

0

ファイルを表示

quotes = quotes_historical_yahoo('INTC', date1, date2)
"""
The dates from the yahoo quotes module get returned as integers, which happen
to correspond to the integer representation of 'DAILY' frequency dates in the
scikits.timeseries module. So create a DateArray of daily dates,  then convert
this to business day frequency afterwards.
"""
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')
opens = [q[1] for q in quotes]
raw_series = ts.time_series(opens, dates)
"""
`fill_missing_dates` will insert masked values for any missing data points.
Note that you could plot the series without doing this, but it would cause
missing values to be linearly interpolated rather than left empty in the plot.
"""
series = ts.fill_missing_dates(raw_series)
fig = tpl.tsfigure()
fsp = fig.add_tsplot(111)
fsp.tsplot(series, '-')
"""
Add grid lines at start of each quarter. Grid lines appear at the major tick
marks by default (which, due to the dynamic nature of the ticks for time
series plots, cannot be guaranteed to be at quarter start). So if you want
grid lines to appear at specific intervals, you must first specify xticks
explicitly.
"""
dates = series.dates
quarter_starts = dates[dates.quarter != (dates - 1).quarter]
fsp.set_xticks(quarter_starts.tovalue())
fsp.grid()
plt.show()

コード例 #8

0

ファイルを表示

ファイル: FunctionProcessor.py プロジェクト: exedre/e4t

    def weighted(self,*args):
        R = None
        (weights,[data,series]) = args
        logger.debug("Params are (w=%s,d=%s,s=%s)" % (weights,data,series))
        if check_type(weights,str,unicode) and check_type(data,str,unicode) and check_type(series,str,unicode):
            logger.debug("Params are (w=%s,d=%s,s=%s)" % (weights,data,series))
            Ws = weights
            if ',' in series:
                Ss = series.split(',')
            else:
                raise ValueError,"La stringa di definizione delle serie è errata"
            if ',' in weights:
                Ws = weights.split(',')
            else:
                if '$$' not in weights:
                    raise ValueError,"La stringa di definizione dei pesi non permette la generazione di un elenco"
                Ws = [ weights.replace('$$',S) for S in Ss ]
            if ',' in data:
                Ds = data.split(',')
            else:
                if '$$' not in data:
                    raise ValueError,"La stringa di definizione dei dati non permette la generazione di un elenco"
                Ds = [ data.replace('$$',S) for S in Ss ]
            if len(Ws)==len(Ds):
                W = 0.0
                S = 0.0
                for d,w in zip(Ds,Ws):
                    wb=IS.has_key(w)
                    db=IS.has_key(d)
                    if wb and db:
                        Wn = IS[w].data[0]
                        Dn = IS[d].data
                        try:
#                            if isinstance(Dn,ts.TimeSeries):
#                                Dn = np.nan_to_num(Dn)
#                            if isinstance(W,ts.TimeSeries):
#                                W = np.nan_to_num(W)
#                            if isinstance(W,ts.TimeSeries):
#                                ts.align_series(W,Dn)
                            if isinstance(Dn,ts.TimeSeries):                            
                                Dn = ts.time_series(Dn,copy=True)
                                if isinstance(W,ts.TimeSeries):
                                    Dn = ts.convert(Dn,W.freq)
                                    ts.fill_missing_dates(Dn,dates=W.dates,fill_value=np.nan) 
                                    Dn = ts.adjust_endpoints(Dn,
                                                             start_date=W.start_date,
                                                             end_date=W.end_date,
                                                             copy=True)
                                    # _ts = np.ma.masked_invalid(Dn)
                                    # Dn = _ts.filled(0.0)
#                            if isinstance(W,ts.TimeSeries):
#                                print "GOò"
#                                _report(W,Dn*Wn)
                            W += Dn*Wn
#                            if isinstance(W,ts.TimeSeries):
#                                print "="
#                                _report(W)
                        except Exception, exc:              
#                           if isinstance(W,ts.TimeSeries):              
#                                _report(W)
#                                print "DN"
#                                _report(Dn)                                
#                           print w,d,"W",type(W),W,W.shape,"D",type(Dn),Dn,Dn.shape,"Wn",type(Wn),Wn
                           raise
                           logger.error('Non posso comporre %s * %s | %s', d,w)
                           ValueError, "%s * %s" % (d,w)
                        S += Wn
                    else:
                        if not wb: logger.error(u'la serie %s non è presente nell\'IS. ATTENZIONE i risultati dell\'aggregazione possono non essere sono attendibili',w)
                        if not db: logger.error(u'la serie %s non è presente nell\'IS. ATTENZIONE i risultati  dell\'aggregazione possono non essere sono attendibili',d)
                try:
                    R = ets.Timeseries(data=W / S,name="WEIGHTED(\"%s\",\"%s\")" % (','.join(Ws),','.join(Ds)))
                except ZeroDivisionError, exc:
                    logger.warn('ZeroDivisionError')
                    R = None

コード例 #9

0

ファイルを表示

ファイル: yahoo.py プロジェクト: B-Rich/scikits.timeseries-sandbox

"""
The dates from the yahoo quotes module get returned as integers, which happen
to correspond to the integer representation of 'DAILY' frequency dates in the
scikits.timeseries module. So create a DateArray of daily dates,  then convert
this to business day frequency afterwards.
"""
dates = ts.date_array([q[0] for q in quotes], freq='DAILY').asfreq('BUSINESS')
opens = [q[1] for q in quotes]
raw_series = ts.time_series(opens, dates)

"""
`fill_missing_dates` will insert masked values for any missing data points.
Note that you could plot the series without doing this, but it would cause
missing values to be linearly interpolated rather than left empty in the plot.
"""
series = ts.fill_missing_dates(raw_series)
fig = tpl.tsfigure()
fsp = fig.add_tsplot(111)
fsp.tsplot(series, '-')

"""
Add grid lines at start of each quarter. Grid lines appear at the major tick
marks by default (which, due to the dynamic nature of the ticks for time
series plots, cannot be guaranteed to be at quarter start). So if you want
grid lines to appear at specific intervals, you must first specify xticks
explicitly.
"""
dates = series.dates
quarter_starts = dates[dates.quarter != (dates-1).quarter]
fsp.set_xticks(quarter_starts.tovalue())
fsp.grid()