예제 #1
0
def make_trend(series_len, rw_loc=0.001, rw_scale=0.1, type='rw', seed=1):
    """ Module to generate time-series trend with different methods

    Parameters
    ----------
    series_len: int
        total length of series
    type: str ['arma', 'rw']
    rw_loc: float
        mean of random walk
    rw_scale: float
        scale of random walk
    seed: int
    Returns
    -------
      np.array-llike with length equals `series_len`
    """
    # make trend
    if type == "rw":
        rw = np.random.default_rng(seed).normal(rw_loc, rw_scale, series_len)
        trend = np.cumsum(rw)
    elif type == "arma":
        # TODO: consider parameterize this
        arparams = np.array([.25])
        maparams = np.array([.6])
        ar = np.r_[1, -arparams]
        ma = np.r_[1, maparams]
        arma_process = sm.tsa.ArmaProcess(ar, ma)
        trend = arma_process.generate_sample(series_len)
    else:
        raise IllegalArgument("Invalid trend_type.")

    return trend
예제 #2
0
def make_trend(series_len,
               method='rw',
               arma=[.25, .6],
               rw_loc=0.0,
               rw_scale=0.1,
               seed=1):
    """ Module to generate time-series trend with different methods
    Parameters
    ----------
    series_len: int
        Total length of series
    method: str ['arma', 'rw']
        In case of `'rw'`, a simple random walk process will be used. For `'arma'`, we will use `statsmodels.api` to
        simulate a simple ARMA(1, 1) process
    arma: list
        List [arparams, maparams] of size 2 where used for arma(1) generating process
    rw_loc: float
        Location parameter of random walk generated by `np.random.normal()`
    rw_scale: float
        Scale parameter of random walk generated by `np.random.normal()`
    seed: int
        Seed passed into `np.random.default_rng()`
    Returns
    -------
    np.array-llike
        Simulated trend with length equals `series_len`

    Notes
    -----
        1. ARMA process: https://www.statsmodels.org/stable/generated/statsmodels.tsa.arima_process.ArmaProcess.html
    """
    # make trend
    if method == "rw":
        rw = np.random.default_rng(seed).normal(rw_loc, rw_scale, series_len)
        trend = np.cumsum(rw)
    elif method == "arma":
        arparams = np.array([arma[0]])
        maparams = np.array([arma[1]])
        # add zero-lag and negate
        ar = np.r_[1, -arparams]
        # add zero-lag
        ma = np.r_[1, maparams]
        arma_process = ArmaProcess(ar, ma)
        trend = arma_process.generate_sample(series_len)
    else:
        raise IllegalArgument("Invalid trend method.")

    return trend
예제 #3
0
파일: features.py 프로젝트: szmark001/orbit
def make_seasonal_dummies(df, date_col, freq, sparse=True, drop_first=True):
    """ Based on the frequency input (in pandas.DataFrame style), provide dummies indicator for regression type of
    purpose.

    Parameters
    ----------
    df : pd.DataFrame
        Input dataframe to supply datetime array for generating series of indicators
    date_col : str
        Label of the date column supply for generating series
    freq : str ['weekday', 'month', 'week']
        Options to pick the right frequency for generating dummies
    sparse : bool
        argument passed into `pd.get_dummies`
    drop_first : bool
        argument passed into `pd.get_dummies`
    Returns
    -------
    out : pd.DataFrame
        data with computed fourier series attached
    fs_cols : list
        list of labels derived from fourier series
    Notes
    -----
        This is calling :func:`pd.get_dummies`
    """
    if freq == 'weekday':
        dummies = pd.get_dummies(df[date_col].dt.weekday,
                                 prefix='wd',
                                 sparse=sparse,
                                 drop_first=drop_first)
    elif freq == 'month':
        dummies = pd.get_dummies(df[date_col].dt.month,
                                 prefix='m',
                                 sparse=sparse,
                                 drop_first=drop_first)
    elif freq == 'week':
        dummies = pd.get_dummies(df[date_col].dt.week,
                                 prefix='w',
                                 sparse=sparse,
                                 drop_first=drop_first)
    else:
        raise IllegalArgument("Invalid argument of freq.")

    cols = dummies.columns.tolist()
    out = pd.concat([df, dummies], axis=1)
    return out, cols