Esempio n. 1
0
def seasonal_mean(x, freq):
    """
    Return means for each period in x. freq is an int that gives the
    number of periods per cycle. E.g., 12 for monthly. NaNs are ignored
    in the mean.
    """
    return np.array([pd_nanmean(x[i::freq]) for i in range(freq)])
Esempio n. 2
0
def seasonal_mean(x, freq):
    """
    Return means for each period in x. freq is an int that gives the
    number of periods per cycle. E.g., 12 for monthly. NaNs are ignored
    in the mean.
    """
    return np.array([pd_nanmean(x[i::freq], axis=0) for i in range(freq)])
Esempio n. 3
0
def decompose(df, period=365, lo_frac=0.6, lo_delta=0.01):
    """Create a seasonal-trend (with Loess, aka "STL") decomposition of observed time series data.

    This implementation is modeled after the ``statsmodels.tsa.seasonal_decompose`` method 
    but substitutes a Lowess regression for a convolution in its trend estimation.

    This is an additive model, Y[t] = T[t] + S[t] + e[t]        

    For more details on lo_frac and lo_delta, see: 
    `statsmodels.nonparametric.smoothers_lowess.lowess()`

    Args:
        df (pandas.Dataframe): Time series of observed counts. This DataFrame must be continuous (no 
            gaps or missing data), and include a ``pandas.DatetimeIndex``.  
        period (int, optional): Most significant periodicity in the observed time series, in units of
            1 observation. Ex: to accomodate strong annual periodicity within years of daily 
            observations, ``period=365``. 
        lo_frac (float, optional): Fraction of data to use in fitting Lowess regression. 
        lo_delta (float, optional): Fractional distance within which to use linear-interpolation 
            instead of weighted regression. Using non-zero ``lo_delta`` significantly decreases 
            computation time.

    Returns:
        `statsmodels.tsa.seasonal.DecomposeResult`: An object with DataFrame attributes for the 
            seasonal, trend, and residual components, as well as the average seasonal cycle. 

    """
    # use some existing pieces of statsmodels    
    lowess = sm.nonparametric.lowess
    _pandas_wrapper, _ = _maybe_get_pandas_wrapper_freq(df)

    # get plain np array
    observed = np.asanyarray(df).squeeze()

    # calc trend, remove from observation
    trend = lowess(observed, [x for x in range(len(observed))], 
                   frac=lo_frac, 
                   delta=lo_delta * len(observed),
                   return_sorted=False)
    detrended = observed - trend

    # period must not be larger than size of series to avoid introducing NaNs
    period = min(period, len(observed))

    # calc one-period seasonality, remove tiled array from detrended
    period_averages = np.array([pd_nanmean(detrended[i::period]) for i in range(period)])
    # 0-center the period avgs
    period_averages -= np.mean(period_averages)
    seasonal = np.tile(period_averages, len(observed) // period + 1)[:len(observed)]    
    resid = detrended - seasonal

    # convert the arrays back to appropriate dataframes, stuff them back into 
    #  the statsmodel object
    results = list(map(_pandas_wrapper, [seasonal, trend, resid, observed]))    
    dr = DecomposeResult(seasonal=results[0],
                         trend=results[1],
                         resid=results[2], 
                         observed=results[3],
                         period_averages=period_averages)
    return dr
def _seaonal_cyle_ufunc(data, period=None):
    '''
    _seasonal_cyle(data, period=None)
    
    calculates a repeating seasonal cycle
    
    Parameters
    ----------
    data: 1-D numpy array
        The y-values of the observed points
    period: float
        the period of the seasonal cycle. 
        This depends on the sampling frequency of your data
        if monthly, then it is 12 if daily then 365

    Returns
    -------
    out: ndarray, float
        returns repeating seasonal cycle
    '''        
    ### This adds an extra dimension if 1D
    ### Turns DataArray into numpy array
    if (len(data.shape)==1):
        data = np.expand_dims(data, axis=1)
       
    ### If importing an xr.DataArray make numpy array
    if (type(data)==type(xr.DataArray([]))):
        data = data.values
        
    ### Get dimensions
    ndim0 = np.shape(data)[0]
    ndim1 = np.shape(data)[1]

    ### Allocate space to store data
    seasonal = np.ones((ndim0, ndim1))*np.NaN

    ### Loop over the stacked dimension
    #for dim1 in tqdm(range(ndim1)):
    for dim1 in range(ndim1):  
        ### Mask is true if not a NaN
        mask = ~np.isnan(data[:, dim1])
        
        ### If the mask is all false
        ### We will skip that point
        if np.sum(mask)!=0:      
            period_averages = np.array([pd_nanmean(data[i::period,dim1]) for i in range(period)])
            period_averages = period_averages - np.mean(period_averages)
            seasonal[:,dim1] = np.tile(period_averages, 
                                       len(data[:,dim1]) // period + 1)[:len(data[:,dim1])] 
            
    return seasonal
Esempio n. 5
0
def series_seasonal(df, window):
    seasonal = np.array(
        [pd_nanmean(df[i::window], axis=0) for i in range(window)])
    return seasonal
Esempio n. 6
0
def seasonalMean(s, freq):
    return np.array([pd_nanmean(s[i::freq]) for i in range(freq)])
Esempio n. 7
0
def seasonal_mean(x, freq):
    return np.array([pd_nanmean(x[i::freq], axis=0) for i in range(freq)])