Ejemplo n.º 1
0
    def test_dataframe(self):
        df = pd.DataFrame(self.arr_2d)
        appended = tools.add_trend(df)
        expected = df.copy()
        expected["const"] = self.c
        assert_frame_equal(expected, appended)

        prepended = tools.add_trend(df, prepend=True)
        expected = df.copy()
        expected.insert(0, "const", self.c)
        assert_frame_equal(expected, prepended)

        df = pd.DataFrame(self.arr_2d)
        appended = tools.add_trend(df, trend="t")
        expected = df.copy()
        expected["trend"] = self.t
        assert_frame_equal(expected, appended)

        df = pd.DataFrame(self.arr_2d)
        appended = tools.add_trend(df, trend="ctt")
        expected = df.copy()
        expected["const"] = self.c
        expected["trend"] = self.t
        expected["trend_squared"] = self.t ** 2
        assert_frame_equal(expected, appended)
Ejemplo n.º 2
0
    def test_dataframe(self):
        df = pd.DataFrame(self.arr_2d)
        appended = tools.add_trend(df)
        expected = df.copy()
        expected['const'] = self.c
        assert_frame_equal(expected, appended)

        prepended = tools.add_trend(df, prepend=True)
        expected = df.copy()
        expected.insert(0, 'const', self.c)
        assert_frame_equal(expected, prepended)

        df = pd.DataFrame(self.arr_2d)
        appended = tools.add_trend(df, trend='t')
        expected = df.copy()
        expected['trend'] = self.t
        assert_frame_equal(expected, appended)

        df = pd.DataFrame(self.arr_2d)
        appended = tools.add_trend(df, trend='ctt')
        expected = df.copy()
        expected['const'] = self.c
        expected['trend'] = self.t
        expected['trend_squared'] = self.t ** 2
        assert_frame_equal(expected, appended)
Ejemplo n.º 3
0
 def test_mixed_recarray(self):
     dt = np.dtype([('c0', np.float64), ('c1', np.int8), ('c2', 'S4')])
     ra = np.array([(1.0, 1, 'aaaa'), (1.1, 2, 'bbbb')], dtype=dt).view(np.recarray)
     added = tools.add_trend(ra, trend='ct')
     dt = np.dtype([('c0', np.float64), ('c1', np.int8), ('c2', 'S4'), ('const', np.float64), ('trend', np.float64)])
     expected = np.array([(1.0, 1, 'aaaa', 1.0, 1.0), (1.1, 2, 'bbbb', 1.0, 2.0)], dtype=dt).view(np.recarray)
     assert_equal(added, expected)
Ejemplo n.º 4
0
 def test_mixed_recarray(self):
     dt = np.dtype([("c0", np.float64), ("c1", np.int8), ("c2", "S4")])
     ra = np.array([(1.0, 1, "aaaa"), (1.1, 2, "bbbb")], dtype=dt).view(np.recarray)
     added = tools.add_trend(ra, trend="ct")
     dt = np.dtype([("c0", np.float64), ("c1", np.int8), ("c2", "S4"), ("const", np.float64), ("trend", np.float64)])
     expected = np.array([(1.0, 1, "aaaa", 1.0, 1.0), (1.1, 2, "bbbb", 1.0, 2.0)], dtype=dt).view(np.recarray)
     assert_equal(added, expected)
Ejemplo n.º 5
0
    def test_series(self):
        s = pd.Series(self.arr_1d)
        appended = tools.add_trend(s)
        expected = pd.DataFrame(s)
        expected["const"] = self.c
        assert_frame_equal(expected, appended)

        prepended = tools.add_trend(s, prepend=True)
        expected = pd.DataFrame(s)
        expected.insert(0, "const", self.c)
        assert_frame_equal(expected, prepended)

        s = pd.Series(self.arr_1d)
        appended = tools.add_trend(s, trend="ct")
        expected = pd.DataFrame(s)
        expected["const"] = self.c
        expected["trend"] = self.t
        assert_frame_equal(expected, appended)
Ejemplo n.º 6
0
    def test_series(self):
        s = pd.Series(self.arr_1d)
        appended = tools.add_trend(s)
        expected = pd.DataFrame(s)
        expected['const'] = self.c
        assert_frame_equal(expected, appended)

        prepended = tools.add_trend(s, prepend=True)
        expected = pd.DataFrame(s)
        expected.insert(0, 'const', self.c)
        assert_frame_equal(expected, prepended)

        s = pd.Series(self.arr_1d)
        appended = tools.add_trend(s, trend='ct')
        expected = pd.DataFrame(s)
        expected['const'] = self.c
        expected['trend'] = self.t
        assert_frame_equal(expected, appended)
Ejemplo n.º 7
0
    def test_array(self):
        base = np.vstack((self.arr_1d, self.c, self.t, self.t ** 2)).T
        assert_equal(tools.add_trend(self.arr_1d), base[:, :2])
        assert_equal(tools.add_trend(self.arr_1d, trend='t'), base[:, [0, 2]])
        assert_equal(tools.add_trend(self.arr_1d, trend='ct'), base[:, :3])
        assert_equal(tools.add_trend(self.arr_1d, trend='ctt'), base)

        base = np.hstack((self.c[:, None], self.t[:, None], self.t[:, None] ** 2, self.arr_2d))
        assert_equal(tools.add_trend(self.arr_2d, prepend=True), base[:, [0, 3, 4]])
        assert_equal(tools.add_trend(self.arr_2d, trend='t', prepend=True), base[:, [1, 3, 4]])
        assert_equal(tools.add_trend(self.arr_2d, trend='ct', prepend=True), base[:, [0, 1, 3, 4]])
        assert_equal(tools.add_trend(self.arr_2d, trend='ctt', prepend=True), base)
Ejemplo n.º 8
0
    def test_duplicate_const(self):
        assert_raises(ValueError, tools.add_trend, x=self.c, trend="c", has_constant="raise")
        assert_raises(ValueError, tools.add_trend, x=self.c, trend="ct", has_constant="raise")
        df = pd.DataFrame(self.c)
        assert_raises(ValueError, tools.add_trend, x=df, trend="c", has_constant="raise")
        assert_raises(ValueError, tools.add_trend, x=df, trend="ct", has_constant="raise")

        skipped = tools.add_trend(self.c, trend="c")
        assert_equal(skipped, self.c[:, None])

        skipped_const = tools.add_trend(self.c, trend="ct", has_constant="skip")
        expected = np.vstack((self.c, self.t)).T
        assert_equal(skipped_const, expected)

        added = tools.add_trend(self.c, trend="c", has_constant="add")
        expected = np.vstack((self.c, self.c)).T
        assert_equal(added, expected)

        added = tools.add_trend(self.c, trend="ct", has_constant="add")
        expected = np.vstack((self.c, self.c, self.t)).T
        assert_equal(added, expected)
Ejemplo n.º 9
0
    def test_recarray(self):
        recarray = pd.DataFrame(self.arr_2d).to_records(index=False, convert_datetime64=False)
        appended = tools.add_trend(recarray)
        expected = pd.DataFrame(self.arr_2d)
        expected['const'] = self.c
        expected = expected.to_records(index=False, convert_datetime64=False)
        assert_equal(expected, appended)

        prepended = tools.add_trend(recarray, prepend=True)
        expected = pd.DataFrame(self.arr_2d)
        expected.insert(0, 'const', self.c)
        expected = expected.to_records(index=False, convert_datetime64=False)
        assert_equal(expected, prepended)

        appended = tools.add_trend(recarray, trend='ctt')
        expected = pd.DataFrame(self.arr_2d)
        expected['const'] = self.c
        expected['trend'] = self.t
        expected['trend_squared'] = self.t ** 2
        expected = expected.to_records(index=False, convert_datetime64=False)
        assert_equal(expected, appended)
Ejemplo n.º 10
0
    def test_duplicate_const(self):
        assert_raises(ValueError, tools.add_trend, x=self.c, trend='c', has_constant='raise')
        assert_raises(ValueError, tools.add_trend, x=self.c, trend='ct', has_constant='raise')
        df = pd.DataFrame(self.c)
        assert_raises(ValueError, tools.add_trend, x=df, trend='c', has_constant='raise')
        assert_raises(ValueError, tools.add_trend, x=df, trend='ct', has_constant='raise')

        skipped = tools.add_trend(self.c, trend='c')
        assert_equal(skipped, self.c[:,None])

        skipped_const = tools.add_trend(self.c, trend='ct', has_constant='skip')
        expected = np.vstack((self.c, self.t)).T
        assert_equal(skipped_const, expected)

        added = tools.add_trend(self.c, trend='c', has_constant='add')
        expected = np.vstack((self.c, self.c)).T
        assert_equal(added, expected)

        added = tools.add_trend(self.c, trend='ct', has_constant='add')
        expected = np.vstack((self.c, self.c, self.t)).T
        assert_equal(added, expected)
Ejemplo n.º 11
0
    def _stackX(self, k_ar, trend):
        """
        Private method to build the RHS matrix for estimation.

        Columns are trend terms then lags.
        """
        endog = self.endog
        X = lagmat(endog, maxlag=k_ar, trim='both')
        k_trend = util.get_trendorder(trend)
        if k_trend:
            X = add_trend(X, prepend=True, trend=trend)
        self.k_trend = k_trend
        return X
Ejemplo n.º 12
0
    def test_duplicate_const(self):
        assert_raises(ValueError,
                      tools.add_trend,
                      x=self.c,
                      trend='c',
                      has_constant='raise')
        assert_raises(ValueError,
                      tools.add_trend,
                      x=self.c,
                      trend='ct',
                      has_constant='raise')
        df = pd.DataFrame(self.c)
        assert_raises(ValueError,
                      tools.add_trend,
                      x=df,
                      trend='c',
                      has_constant='raise')
        assert_raises(ValueError,
                      tools.add_trend,
                      x=df,
                      trend='ct',
                      has_constant='raise')

        skipped = tools.add_trend(self.c, trend='c')
        assert_equal(skipped, self.c[:, None])

        skipped_const = tools.add_trend(self.c,
                                        trend='ct',
                                        has_constant='skip')
        expected = np.vstack((self.c, self.t)).T
        assert_equal(skipped_const, expected)

        added = tools.add_trend(self.c, trend='c', has_constant='add')
        expected = np.vstack((self.c, self.c)).T
        assert_equal(added, expected)

        added = tools.add_trend(self.c, trend='ct', has_constant='add')
        expected = np.vstack((self.c, self.c, self.t)).T
        assert_equal(added, expected)
Ejemplo n.º 13
0
    def test_recarray(self):
        recarray = pd.DataFrame(self.arr_2d).to_records(
            index=False, convert_datetime64=False)
        appended = tools.add_trend(recarray)
        expected = pd.DataFrame(self.arr_2d)
        expected['const'] = self.c
        expected = expected.to_records(index=False, convert_datetime64=False)
        assert_equal(expected, appended)

        prepended = tools.add_trend(recarray, prepend=True)
        expected = pd.DataFrame(self.arr_2d)
        expected.insert(0, 'const', self.c)
        expected = expected.to_records(index=False, convert_datetime64=False)
        assert_equal(expected, prepended)

        appended = tools.add_trend(recarray, trend='ctt')
        expected = pd.DataFrame(self.arr_2d)
        expected['const'] = self.c
        expected['trend'] = self.t
        expected['trend_squared'] = self.t**2
        expected = expected.to_records(index=False, convert_datetime64=False)
        assert_equal(expected, appended)
Ejemplo n.º 14
0
    def _stackX(self, k_ar, trend):
        """
        Private method to build the RHS matrix for estimation.

        Columns are trend terms then lags.
        """
        endog = self.endog
        X = lagmat(endog, maxlag=k_ar, trim='both')
        k_trend = util.get_trendorder(trend)
        if k_trend:
            X = add_trend(X, prepend=True, trend=trend)
        self.k_trend = k_trend
        return X
Ejemplo n.º 15
0
    def test_recarray(self):
        recarray = pd.DataFrame(self.arr_2d).to_records(index=False)
        with pytest.warns(FutureWarning, match="recarray support"):
            appended = tools.add_trend(recarray)
        expected = pd.DataFrame(self.arr_2d)
        expected['const'] = self.c
        expected = expected.to_records(index=False)
        assert_equal(expected, appended)
        with pytest.warns(FutureWarning, match="recarray support"):
            prepended = tools.add_trend(recarray, prepend=True)
        expected = pd.DataFrame(self.arr_2d)
        expected.insert(0, 'const', self.c)
        expected = expected.to_records(index=False)
        assert_equal(expected, prepended)

        with pytest.warns(FutureWarning, match="recarray support"):
            appended = tools.add_trend(recarray, trend='ctt')
        expected = pd.DataFrame(self.arr_2d)
        expected['const'] = self.c
        expected['trend'] = self.t
        expected['trend_squared'] = self.t ** 2
        expected = expected.to_records(index=False)
        assert_equal(expected, appended)
Ejemplo n.º 16
0
def _make_arma_exog(endog, exog, trend):
    k_trend = 1 # overwritten if no constant
    if exog is None and trend == 'c':   # constant only
        exog = np.ones((len(endog),1))
    elif exog is not None and trend == 'c': # constant plus exogenous
        exog = add_trend(exog, trend='c', prepend=True)
    elif exog is not None and trend == 'nc':
        # make sure it's not holding constant from last run
        if exog.var() == 0:
            exog = None
        k_trend = 0
    if trend == 'nc':
        k_trend = 0
    return k_trend, exog
Ejemplo n.º 17
0
def add_constant(data, prepend=True, has_constant='skip'):
    """
    Adds a column of ones to an array

    Parameters
    ----------
    data : array-like
        `data` is the column-ordered design matrix
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data'' already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if a constant is present. Using 'add' will duplicate the
        constant, if one is present.

    Returns
    -------
    data : array, recarray or DataFrame
        The original values with a constant (column of ones) as the first or
        last column. Returned value depends on input type.

    Notes
    -----
    When the input is recarray or a pandas Series or DataFrame, the added
    column's name is 'const'.
    """
    if _is_using_pandas(data, None) or _is_recarray(data):
        from statsmodels.tsa.tsatools import add_trend
        return add_trend(data, trend='c', prepend=prepend, has_constant=has_constant)

    # Special case for NumPy
    x = np.asanyarray(data)
    if x.ndim == 1:
        x = x[:,None]
    elif x.ndim > 2:
        raise ValueError('Only implementd 2-dimensional arrays')

    is_nonzero_const = np.ptp(x, axis=0) == 0
    is_nonzero_const &= np.all(x != 0.0, axis=0)
    if is_nonzero_const.any():
        if has_constant == 'skip':
            return x
        elif has_constant == 'raise':
            raise ValueError("data already contains a constant")

    x = [np.ones(x.shape[0]), x]
    x = x if prepend else x[::-1]
    return np.column_stack(x)
Ejemplo n.º 18
0
def _make_arma_exog(endog, exog, trend):
    k_trend = 1  # overwritten if no constant
    if exog is None and trend == 'c':  # constant only
        exog = np.ones((len(endog), 1))
    elif exog is not None and trend == 'c':  # constant plus exogenous
        exog = add_trend(exog, trend='c', prepend=True)
    elif exog is not None and trend == 'nc':
        # make sure it's not holding constant from last run
        if exog.var() == 0:
            exog = None
        k_trend = 0
    if trend == 'nc':
        k_trend = 0
    return k_trend, exog
Ejemplo n.º 19
0
def add_constant(data, prepend=True, has_constant='skip'):
    """
    Adds a column of ones to an array

    Parameters
    ----------
    data : array-like
        ``data`` is the column-ordered design matrix
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data`` already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if a constant is present. Using 'add' will duplicate the
        constant, if one is present.

    Returns
    -------
    data : array, recarray or DataFrame
        The original values with a constant (column of ones) as the first or
        last column. Returned value depends on input type.

    Notes
    -----
    When the input is recarray or a pandas Series or DataFrame, the added
    column's name is 'const'.
    """
    if _is_using_pandas(data, None) or _is_recarray(data):
        from statsmodels.tsa.tsatools import add_trend
        return add_trend(data, trend='c', prepend=prepend, has_constant=has_constant)

    # Special case for NumPy
    x = np.asanyarray(data)
    if x.ndim == 1:
        x = x[:,None]
    elif x.ndim > 2:
        raise ValueError('Only implementd 2-dimensional arrays')

    is_nonzero_const = np.ptp(x, axis=0) == 0
    is_nonzero_const &= np.all(x != 0.0, axis=0)
    if is_nonzero_const.any():
        if has_constant == 'skip':
            return x
        elif has_constant == 'raise':
            raise ValueError("data already contains a constant")

    x = [np.ones(x.shape[0]), x]
    x = x if prepend else x[::-1]
    return np.column_stack(x)
Ejemplo n.º 20
0
    def test_array(self):
        base = np.vstack((self.arr_1d, self.c, self.t, self.t**2)).T
        assert_equal(tools.add_trend(self.arr_1d), base[:, :2])
        assert_equal(tools.add_trend(self.arr_1d, trend='t'), base[:, [0, 2]])
        assert_equal(tools.add_trend(self.arr_1d, trend='ct'), base[:, :3])
        assert_equal(tools.add_trend(self.arr_1d, trend='ctt'), base)

        base = np.hstack(
            (self.c[:, None], self.t[:, None], self.t[:,
                                                      None]**2, self.arr_2d))
        assert_equal(tools.add_trend(self.arr_2d, prepend=True),
                     base[:, [0, 3, 4]])
        assert_equal(tools.add_trend(self.arr_2d, trend='t', prepend=True),
                     base[:, [1, 3, 4]])
        assert_equal(tools.add_trend(self.arr_2d, trend='ct', prepend=True),
                     base[:, [0, 1, 3, 4]])
        assert_equal(tools.add_trend(self.arr_2d, trend='ctt', prepend=True),
                     base)
Ejemplo n.º 21
0
 def test_mixed_recarray(self):
     dt = np.dtype([("c0", np.float64), ("c1", np.int8), ("c2", "S4")])
     ra = np.array([(1.0, 1, "aaaa"), (1.1, 2, "bbbb")],
                   dtype=dt).view(np.recarray)
     with pytest.warns(FutureWarning, match="recarray support"):
         added = tools.add_trend(ra, trend="ct")
     dt = np.dtype([
         ("c0", np.float64),
         ("c1", np.int8),
         ("c2", "S4"),
         ("const", np.float64),
         ("trend", np.float64),
     ])
     expected = np.array([(1.0, 1, "aaaa", 1.0, 1.0),
                          (1.1, 2, "bbbb", 1.0, 2.0)],
                         dtype=dt).view(np.recarray)
     assert_equal(added, expected)
Ejemplo n.º 22
0
def get_var_endog(y, lags, trend='c'):
    """
    Make predictor matrix for VAR(p) process

    Z := (Z_0, ..., Z_T).T (T x Kp)
    Z_t = [1 y_t y_{t-1} ... y_{t - p + 1}] (Kp x 1)

    Ref: Lutkepohl p.70 (transposed)
    """
    nobs = len(y)
    # Ravel C order, need to put in descending order
    Z = np.array([y[t-lags : t][::-1].ravel() for t in xrange(lags, nobs)])

    # Add constant, trend, etc.
    if trend != 'nc':
        Z = tsa.add_trend(Z, prepend=True, trend=trend)

    return Z
Ejemplo n.º 23
0
def get_var_endog(y, lags, trend="c", has_constant="skip"):
    """
    Make predictor matrix for VAR(p) process

    Z := (Z_0, ..., Z_T).T (T x Kp)
    Z_t = [1 y_t y_{t-1} ... y_{t - p + 1}] (Kp x 1)

    Ref: Lutkepohl p.70 (transposed)

    has_constant can be 'raise', 'add', or 'skip'. See add_constant.
    """
    nobs = len(y)
    # Ravel C order, need to put in descending order
    Z = np.array([y[t - lags : t][::-1].ravel() for t in range(lags, nobs)])

    # Add constant, trend, etc.
    if trend != "nc":
        Z = tsa.add_trend(Z, prepend=True, trend=trend, has_constant=has_constant)

    return Z
Ejemplo n.º 24
0
def simulate_kpss(nobs, B, trend='c', rng=None):
    """
    Simulated the KPSS test statistic for nobs observations,
    performing B replications.
    """
    if rng is None:
        rng = RandomState()
        rng.seed(0)

    standard_normal = rng.standard_normal

    e = standard_normal((nobs, B))
    z = np.ones((nobs, 1))
    if trend == 'ct':
        z = add_trend(z, trend='t')
    zinv = np.linalg.pinv(z)
    trend_coef = zinv.dot(e)
    resid = e - z.dot(trend_coef)
    s = np.cumsum(resid, axis=0)
    lam = np.mean(resid ** 2.0, axis=0)
    kpss = 1 / (nobs ** 2.0) * np.sum(s ** 2.0, axis=0) / lam
    return kpss
Ejemplo n.º 25
0
def get_var_endog(y, lags, trend='c', has_constant='skip'):
    """
    Make predictor matrix for VAR(p) process

    Z := (Z_0, ..., Z_T).T (T x Kp)
    Z_t = [1 y_t y_{t-1} ... y_{t - p + 1}] (Kp x 1)

    Ref: Lütkepohl p.70 (transposed)

    has_constant can be 'raise', 'add', or 'skip'. See add_constant.
    """
    nobs = len(y)
    # Ravel C order, need to put in descending order
    Z = np.array([y[t-lags : t][::-1].ravel() for t in range(lags, nobs)])

    # Add constant, trend, etc.
    trend = tsa.rename_trend(trend)
    if trend != 'n':
        Z = tsa.add_trend(Z, prepend=True, trend=trend,
                          has_constant=has_constant)

    return Z
def simulate_kpss(nobs, B, trend='c', rng=None):
    """
    Simulated the KPSS test statistic for nobs observations,
    performing B replications.
    """
    if rng is None:
        rng = RandomState()
        rng.seed(0)

    standard_normal = rng.standard_normal

    e = standard_normal((nobs, B))
    z = np.ones((nobs, 1))
    if trend == 'ct':
        z = add_trend(z, trend='t')
    zinv = np.linalg.pinv(z)
    trend_coef = zinv.dot(e)
    resid = e - z.dot(trend_coef)
    s = np.cumsum(resid, axis=0)
    lam = np.mean(resid ** 2.0, axis=0)
    kpss = 1 / (nobs ** 2.0) * np.sum(s ** 2.0, axis=0) / lam
    return kpss
Ejemplo n.º 27
0
def my_adfuller(y, maxlag=None, regression='c'):
    """
    Augmented Dickey-Fuller test (it reduces to non-augmented version if maxlag=0: dY_t = phi*Y_{t-1} + eps_t)
    e.g. maxlag=1 model: dY_t = phi*Y_{t-1} + phi_1*dY_{t-1} + eps_t
    NOTE: this implementation does not allow to add a time-dependence term
    :param y: time series which wants to be checked for stationarity
    :param maxlag: maximum lag to include
    :param regression: str {'c','nc'} Constant to include in regression
        * 'c' : constant only (default)
        * 'nc' : no constant, no trend
    :return: dictionary with OLS results
    """
    y = np.asarray(y)  # ensure it is in array form
    ydiff = np.diff(y)  # get the differences (dY_t term)
    ydall = lagmat(ydiff[:, None], maxlag, trim='both',
                   original='in')  # lagged differences (dY_{t-k} terms)
    nobs = ydall.shape[0]  # number of observations
    ydall[:, 0] = y[-nobs -
                    1:-1]  # replace 0 ydiff with level of y (Y_{t-1} term)
    ydshort = ydiff[-nobs:]  # level up the dimensions of ydiff to match nobs

    Y = ydshort  # endogenous var
    if regression != 'nc':
        X = add_trend(ydall[:, :maxlag + 1], regression)  # exogenous var
    else:
        X = ydall[:, :maxlag + 1]  # exogenous var

    result = my_OLS(
        Y, X)  # do the usual regression using OLS to estimate parameters

    # Add a few other info to the results dictionary
    result['adfstat'] = result['tvalue'][
        0]  # define adfstat as tvalue of phi coefficient
    result['maxlag'] = maxlag
    # Akaike information criterion using statsmodel def for adfuller - not this is different to the def in AR(p) model
    result['aic'] = -2 * result['llf'] + 2 * result['df_model']
    # result['aic'] = np.log(result['sigma']) + 2.0 * (1.0 + result['df_model']) / result['nobs']  # AR(p) def
    return result
Ejemplo n.º 28
0
def add_constant(data, prepend=True, has_constant='skip'):
    """
    Add a column of ones to an array.

    Parameters
    ----------
    data : array_like
        A column-ordered design matrix.
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data`` already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if any column has a constant value. Using 'add' will add a
        column of 1s if a constant column is present.

    Returns
    -------
    array_like
        The original values with a constant (column of ones) as the first or
        last column. Returned value type depends on input type.

    Notes
    -----
    When the input is recarray or a pandas Series or DataFrame, the added
    column's name is 'const'.
    """
    if _is_using_pandas(data, None) or _is_recarray(data):
        if _is_recarray(data):
            # deprecated: remove recarray support after 0.12
            import warnings
            from statsmodels.tools.sm_exceptions import recarray_warning
            warnings.warn(recarray_warning, FutureWarning)
        from statsmodels.tsa.tsatools import add_trend
        return add_trend(data,
                         trend='c',
                         prepend=prepend,
                         has_constant=has_constant)

    # Special case for NumPy
    x = np.asanyarray(data)
    ndim = x.ndim
    if ndim == 1:
        x = x[:, None]
    elif x.ndim > 2:
        raise ValueError('Only implemented for 2-dimensional arrays')

    is_nonzero_const = np.ptp(x, axis=0) == 0
    is_nonzero_const &= np.all(x != 0.0, axis=0)
    if is_nonzero_const.any():
        if has_constant == 'skip':
            return x
        elif has_constant == 'raise':
            if ndim == 1:
                raise ValueError("data is constant.")
            else:
                columns = np.arange(x.shape[1])
                cols = ",".join([str(c) for c in columns[is_nonzero_const]])
                raise ValueError(f"Column(s) {cols} are constant.")

    x = [np.ones(x.shape[0]), x]
    x = x if prepend else x[::-1]
    return np.column_stack(x)
Ejemplo n.º 29
0
def adfuller(x, maxlag=None, regression="c", autolag='AIC',
             store=False, regresults=False):
    """
    Augmented Dickey-Fuller unit root test

    The Augmented Dickey-Fuller test can be used to test for a unit root in a
    univariate process in the presence of serial correlation.

    Parameters
    ----------
    x : array_like, 1d
        data series
    maxlag : int
        Maximum lag which is included in test, default 12*(nobs/100)^{1/4}
    regression : {'c','ct','ctt','nc'}
        Constant and trend order to include in regression

        * 'c' : constant only (default)
        * 'ct' : constant and trend
        * 'ctt' : constant, and linear and quadratic trend
        * 'nc' : no constant, no trend
    autolag : {'AIC', 'BIC', 't-stat', None}
        * if None, then maxlag lags are used
        * if 'AIC' (default) or 'BIC', then the number of lags is chosen
          to minimize the corresponding information criterion
        * 't-stat' based choice of maxlag.  Starts with maxlag and drops a
          lag until the t-statistic on the last lag length is significant
          using a 5%-sized test
    store : bool
        If True, then a result instance is returned additionally to
        the adf statistic. Default is False
    regresults : bool, optional
        If True, the full regression results are returned. Default is False

    Returns
    -------
    adf : float
        Test statistic
    pvalue : float
        MacKinnon's approximate p-value based on MacKinnon (1994, 2010)
    usedlag : int
        Number of lags used
    nobs : int
        Number of observations used for the ADF regression and calculation of
        the critical values
    critical values : dict
        Critical values for the test statistic at the 1 %, 5 %, and 10 %
        levels. Based on MacKinnon (2010)
    icbest : float
        The maximized information criterion if autolag is not None.
    resstore : ResultStore, optional
        A dummy class with results attached as attributes

    Notes
    -----
    The null hypothesis of the Augmented Dickey-Fuller is that there is a unit
    root, with the alternative that there is no unit root. If the pvalue is
    above a critical size, then we cannot reject that there is a unit root.

    The p-values are obtained through regression surface approximation from
    MacKinnon 1994, but using the updated 2010 tables. If the p-value is close
    to significant, then the critical values should be used to judge whether
    to reject the null.

    The autolag option and maxlag for it are described in Greene.

    Examples
    --------
    See example notebook

    References
    ----------
    .. [*] W. Green.  "Econometric Analysis," 5th ed., Pearson, 2003.

    .. [*] Hamilton, J.D.  "Time Series Analysis".  Princeton, 1994.

    .. [*] MacKinnon, J.G. 1994.  "Approximate asymptotic distribution functions for
        unit-root and cointegration tests.  `Journal of Business and Economic
        Statistics` 12, 167-76.

    .. [*] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests."  Queen's
        University, Dept of Economics, Working Papers.  Available at
        http://ideas.repec.org/p/qed/wpaper/1227.html
    """

    if regresults:
        store = True

    trenddict = {None: 'nc', 0: 'c', 1: 'ct', 2: 'ctt'}
    if regression is None or isinstance(regression, (int, long)):
        regression = trenddict[regression]
    regression = regression.lower()
    if regression not in ['c', 'nc', 'ct', 'ctt']:
        raise ValueError("regression option %s not understood") % regression
    x = np.asarray(x)
    nobs = x.shape[0]

    if maxlag is None:
        #from Greene referencing Schwert 1989
        maxlag = int(np.ceil(12. * np.power(nobs / 100., 1 / 4.)))

    xdiff = np.diff(x)
    xdall = lagmat(xdiff[:, None], maxlag, trim='both', original='in')
    nobs = xdall.shape[0]  # pylint: disable=E1103

    xdall[:, 0] = x[-nobs - 1:-1]  # replace 0 xdiff with level of x
    xdshort = xdiff[-nobs:]

    if store:
        resstore = ResultsStore()
    if autolag:
        if regression != 'nc':
            fullRHS = add_trend(xdall, regression, prepend=True)
        else:
            fullRHS = xdall
        startlag = fullRHS.shape[1] - xdall.shape[1] + 1  # 1 for level  # pylint: disable=E1103
        #search for lag length with smallest information criteria
        #Note: use the same number of observations to have comparable IC
        #aic and bic: smaller is better

        if not regresults:
            icbest, bestlag = _autolag(OLS, xdshort, fullRHS, startlag,
                                       maxlag, autolag)
        else:
            icbest, bestlag, alres = _autolag(OLS, xdshort, fullRHS, startlag,
                                              maxlag, autolag,
                                              regresults=regresults)
            resstore.autolag_results = alres

        bestlag -= startlag  # convert to lag not column index

        #rerun ols with best autolag
        xdall = lagmat(xdiff[:, None], bestlag, trim='both', original='in')
        nobs = xdall.shape[0]   # pylint: disable=E1103
        xdall[:, 0] = x[-nobs - 1:-1]  # replace 0 xdiff with level of x
        xdshort = xdiff[-nobs:]
        usedlag = bestlag
    else:
        usedlag = maxlag
        icbest = None
    if regression != 'nc':
        resols = OLS(xdshort, add_trend(xdall[:, :usedlag + 1],
                     regression)).fit()
    else:
        resols = OLS(xdshort, xdall[:, :usedlag + 1]).fit()

    adfstat = resols.tvalues[0]
#    adfstat = (resols.params[0]-1.0)/resols.bse[0]
    # the "asymptotically correct" z statistic is obtained as
    # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
    # I think this is the statistic that is used for series that are integrated
    # for orders higher than I(1), ie., not ADF but cointegration tests.

    # Get approx p-value and critical values
    pvalue = mackinnonp(adfstat, regression=regression, N=1)
    critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
    critvalues = {"1%" : critvalues[0], "5%" : critvalues[1],
                  "10%" : critvalues[2]}
    if store:
        resstore.resols = resols
        resstore.maxlag = maxlag
        resstore.usedlag = usedlag
        resstore.adfstat = adfstat
        resstore.critvalues = critvalues
        resstore.nobs = nobs
        resstore.H0 = ("The coefficient on the lagged level equals 1 - "
                       "unit root")
        resstore.HA = "The coefficient on the lagged level < 1 - stationary"
        resstore.icbest = icbest
        resstore._str = 'Augmented Dickey-Fuller Test Results'
        return adfstat, pvalue, critvalues, resstore
    else:
        if not autolag:
            return adfstat, pvalue, usedlag, nobs, critvalues
        else:
            return adfstat, pvalue, usedlag, nobs, critvalues, icbest
Ejemplo n.º 30
0
def coint(y0, y1, trend='c', method='aeg', maxlag=None, autolag='aic',
          return_results=None):
    """Test for no-cointegration of a univariate equation

    The null hypothesis is no cointegration. Variables in y0 and y1 are
    assumed to be integrated of order 1, I(1).

    This uses the augmented Engle-Granger two-step cointegration test.
    Constant or trend is included in 1st stage regression, i.e. in
    cointegrating equation.

    Parameters
    ----------
    y1 : array_like, 1d
        first element in cointegrating vector
    y2 : array_like
        remaining elements in cointegrating vector
    trend : str {'c', 'ct'}
        trend term included in regression for cointegrating equation
        * 'c' : constant
        * 'ct' : constant and linear trend
        * also available quadratic trend 'ctt', and no constant 'nc'

    method : string
        currently only 'aeg' for augmented Engle-Granger test is available.
        default might change.
    maxlag : None or int
        keyword for `adfuller`, largest or given number of lags
    autolag : string
        keyword for `adfuller`, lag selection criterion.
    return_results : bool
        for future compatibility, currently only tuple available.
        If True, then a results instance is returned. Otherwise, a tuple
        with the test outcome is returned.
        Set `return_results=False` to avoid future changes in return.


    Returns
    -------
    coint_t : float
        t-statistic of unit-root test on residuals
    pvalue : float
        MacKinnon's approximate, asymptotic p-value based on MacKinnon (1994)
    crit_value : dict
        Critical values for the test statistic at the 1 %, 5 %, and 10 %
        levels based on regression curve. This depends on the number of
        observations.

    Notes
    -----
    The Null hypothesis is that there is no cointegration, the alternative
    hypothesis is that there is cointegrating relationship. If the pvalue is
    small, below a critical size, then we can reject the hypothesis that there
    is no cointegrating relationship.

    P-values and critical values are obtained through regression surface
    approximation from MacKinnon 1994 and 2010.

    TODO: We could handle gaps in data by dropping rows with nans in the
    auxiliary regressions. Not implemented yet, currently assumes no nans
    and no gaps in time series.

    References
    ----------
    MacKinnon, J.G. 1994  "Approximate Asymptotic Distribution Functions for
        Unit-Root and Cointegration Tests." Journal of Business & Economics
        Statistics, 12.2, 167-76.
    MacKinnon, J.G. 2010.  "Critical Values for Cointegration Tests."
        Queen's University, Dept of Economics Working Papers 1227.
        http://ideas.repec.org/p/qed/wpaper/1227.html
    """

    trend = trend.lower()
    if trend not in ['c', 'nc', 'ct', 'ctt']:
        raise ValueError("trend option %s not understood" % trend)
    y0 = np.asarray(y0)
    y1 = np.asarray(y1)
    if y1.ndim < 2:
        y1 = y1[:, None]
    nobs, k_vars = y1.shape
    k_vars += 1   # add 1 for y0

    if trend == 'nc':
        xx = y1
    else:
        xx = add_trend(y1, trend=trend, prepend=False)

    res_co = OLS(y0, xx).fit()

    if res_co.rsquared < 1 - np.sqrt(np.finfo(np.double).eps):
        res_adf = adfuller(res_co.resid, maxlag=maxlag, autolag=None,
                           regression='nc')
    else:
        import warnings
        warnings.warn("y0 and y1 are perfectly colinear.  Cointegration test "
                      "is not reliable in this case.")
        # Edge case where series are too similar
        res_adf = (0,)

    # no constant or trend, see egranger in Stata and MacKinnon
    if trend == 'nc':
        crit = [np.nan] * 3  # 2010 critical values not available
    else:
        crit = mackinnoncrit(N=k_vars, regression=trend, nobs=nobs - 1)
        #  nobs - 1, the -1 is to match egranger in Stata, I don't know why.
        #  TODO: check nobs or df = nobs - k

    pval_asy = mackinnonp(res_adf[0], regression=trend, N=k_vars)
    return res_adf[0], pval_asy, crit
Ejemplo n.º 31
0
def adfuller(x, maxlag=None, regression="c", autolag='AIC',
             store=False, regresults=False):
    """
    Augmented Dickey-Fuller unit root test

    The Augmented Dickey-Fuller test can be used to test for a unit root in a
    univariate process in the presence of serial correlation.

    Parameters
    ----------
    x : array_like, 1d
        data series
    maxlag : int
        Maximum lag which is included in test, default 12*(nobs/100)^{1/4}
    regression : {'c','ct','ctt','nc'}
        Constant and trend order to include in regression

        * 'c' : constant only (default)
        * 'ct' : constant and trend
        * 'ctt' : constant, and linear and quadratic trend
        * 'nc' : no constant, no trend
    autolag : {'AIC', 'BIC', 't-stat', None}
        * if None, then maxlag lags are used
        * if 'AIC' (default) or 'BIC', then the number of lags is chosen
          to minimize the corresponding information criterion
        * 't-stat' based choice of maxlag.  Starts with maxlag and drops a
          lag until the t-statistic on the last lag length is significant
          using a 5%-sized test
    store : bool
        If True, then a result instance is returned additionally to
        the adf statistic. Default is False
    regresults : bool, optional
        If True, the full regression results are returned. Default is False

    Returns
    -------
    adf : float
        Test statistic
    pvalue : float
        MacKinnon's approximate p-value based on MacKinnon (1994, 2010)
    usedlag : int
        Number of lags used
    nobs : int
        Number of observations used for the ADF regression and calculation of
        the critical values
    critical values : dict
        Critical values for the test statistic at the 1 %, 5 %, and 10 %
        levels. Based on MacKinnon (2010)
    icbest : float
        The maximized information criterion if autolag is not None.
    resstore : ResultStore, optional
        A dummy class with results attached as attributes

    Notes
    -----
    The null hypothesis of the Augmented Dickey-Fuller is that there is a unit
    root, with the alternative that there is no unit root. If the pvalue is
    above a critical size, then we cannot reject that there is a unit root.

    The p-values are obtained through regression surface approximation from
    MacKinnon 1994, but using the updated 2010 tables. If the p-value is close
    to significant, then the critical values should be used to judge whether
    to reject the null.

    The autolag option and maxlag for it are described in Greene.

    Examples
    --------
    See example notebook

    References
    ----------
    .. [1] W. Green.  "Econometric Analysis," 5th ed., Pearson, 2003.

    .. [2] Hamilton, J.D.  "Time Series Analysis".  Princeton, 1994.

    .. [3] MacKinnon, J.G. 1994.  "Approximate asymptotic distribution functions for
        unit-root and cointegration tests.  `Journal of Business and Economic
        Statistics` 12, 167-76.

    .. [4] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests."  Queen's
        University, Dept of Economics, Working Papers.  Available at
        http://ideas.repec.org/p/qed/wpaper/1227.html
    """

    if regresults:
        store = True

    trenddict = {None: 'nc', 0: 'c', 1: 'ct', 2: 'ctt'}
    if regression is None or isinstance(regression, (int, long)):
        regression = trenddict[regression]
    regression = regression.lower()
    if regression not in ['c', 'nc', 'ct', 'ctt']:
        raise ValueError("regression option %s not understood") % regression
    x = np.asarray(x)
    nobs = x.shape[0]

    if maxlag is None:
        #from Greene referencing Schwert 1989
        maxlag = int(np.ceil(12. * np.power(nobs / 100., 1 / 4.)))

    xdiff = np.diff(x)
    xdall = lagmat(xdiff[:, None], maxlag, trim='both', original='in')
    nobs = xdall.shape[0]  # pylint: disable=E1103

    xdall[:, 0] = x[-nobs - 1:-1]  # replace 0 xdiff with level of x
    xdshort = xdiff[-nobs:]

    if store:
        resstore = ResultsStore()
    if autolag:
        if regression != 'nc':
            fullRHS = add_trend(xdall, regression, prepend=True)
        else:
            fullRHS = xdall
        startlag = fullRHS.shape[1] - xdall.shape[1] + 1  # 1 for level  # pylint: disable=E1103
        #search for lag length with smallest information criteria
        #Note: use the same number of observations to have comparable IC
        #aic and bic: smaller is better

        if not regresults:
            icbest, bestlag = _autolag(OLS, xdshort, fullRHS, startlag,
                                       maxlag, autolag)
        else:
            icbest, bestlag, alres = _autolag(OLS, xdshort, fullRHS, startlag,
                                              maxlag, autolag,
                                              regresults=regresults)
            resstore.autolag_results = alres

        bestlag -= startlag  # convert to lag not column index

        #rerun ols with best autolag
        xdall = lagmat(xdiff[:, None], bestlag, trim='both', original='in')
        nobs = xdall.shape[0]   # pylint: disable=E1103
        xdall[:, 0] = x[-nobs - 1:-1]  # replace 0 xdiff with level of x
        xdshort = xdiff[-nobs:]
        usedlag = bestlag
    else:
        usedlag = maxlag
        icbest = None
    if regression != 'nc':
        resols = OLS(xdshort, add_trend(xdall[:, :usedlag + 1],
                     regression)).fit()
    else:
        resols = OLS(xdshort, xdall[:, :usedlag + 1]).fit()

    adfstat = resols.tvalues[0]
#    adfstat = (resols.params[0]-1.0)/resols.bse[0]
    # the "asymptotically correct" z statistic is obtained as
    # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
    # I think this is the statistic that is used for series that are integrated
    # for orders higher than I(1), ie., not ADF but cointegration tests.

    # Get approx p-value and critical values
    pvalue = mackinnonp(adfstat, regression=regression, N=1)
    critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
    critvalues = {"1%" : critvalues[0], "5%" : critvalues[1],
                  "10%" : critvalues[2]}
    if store:
        resstore.resols = resols
        resstore.maxlag = maxlag
        resstore.usedlag = usedlag
        resstore.adfstat = adfstat
        resstore.critvalues = critvalues
        resstore.nobs = nobs
        resstore.H0 = ("The coefficient on the lagged level equals 1 - "
                       "unit root")
        resstore.HA = "The coefficient on the lagged level < 1 - stationary"
        resstore.icbest = icbest
        resstore._str = 'Augmented Dickey-Fuller Test Results'
        return adfstat, pvalue, critvalues, resstore
    else:
        if not autolag:
            return adfstat, pvalue, usedlag, nobs, critvalues
        else:
            return adfstat, pvalue, usedlag, nobs, critvalues, icbest
Ejemplo n.º 32
0
def coint(y0, y1, trend='c', method='aeg', maxlag=None, autolag='aic',
          return_results=None):
    """Test for no-cointegration of a univariate equation

    The null hypothesis is no cointegration. Variables in y0 and y1 are
    assumed to be integrated of order 1, I(1).

    This uses the augmented Engle-Granger two-step cointegration test.
    Constant or trend is included in 1st stage regression, i.e. in
    cointegrating equation.

    Parameters
    ----------
    y1 : array_like, 1d
        first element in cointegrating vector
    y2 : array_like
        remaining elements in cointegrating vector
    trend : str {'c', 'ct'}
        trend term included in regression for cointegrating equation
        * 'c' : constant
        * 'ct' : constant and linear trend
        * also available quadratic trend 'ctt', and no constant 'nc'

    method : string
        currently only 'aeg' for augmented Engle-Granger test is available.
        default might change.
    maxlag : None or int
        keyword for `adfuller`, largest or given number of lags
    autolag : string
        keyword for `adfuller`, lag selection criterion.
    return_results : bool
        for future compatibility, currently only tuple available.
        If True, then a results instance is returned. Otherwise, a tuple
        with the test outcome is returned.
        Set `return_results=False` to avoid future changes in return.


    Returns
    -------
    coint_t : float
        t-statistic of unit-root test on residuals
    pvalue : float
        MacKinnon's approximate, asymptotic p-value based on MacKinnon (1994)
    crit_value : dict
        Critical values for the test statistic at the 1 %, 5 %, and 10 %
        levels based on regression curve. This depends on the number of
        observations.

    Notes
    -----
    The Null hypothesis is that there is no cointegration, the alternative
    hypothesis is that there is cointegrating relationship. If the pvalue is
    small, below a critical size, then we can reject the hypothesis that there
    is no cointegrating relationship.

    P-values and critical values are obtained through regression surface
    approximation from MacKinnon 1994 and 2010.

    TODO: We could handle gaps in data by dropping rows with nans in the
    auxiliary regressions. Not implemented yet, currently assumes no nans
    and no gaps in time series.

    References
    ----------
    MacKinnon, J.G. 1994  "Approximate Asymptotic Distribution Functions for
        Unit-Root and Cointegration Tests." Journal of Business & Economics
        Statistics, 12.2, 167-76.
    MacKinnon, J.G. 2010.  "Critical Values for Cointegration Tests."
        Queen's University, Dept of Economics Working Papers 1227.
        http://ideas.repec.org/p/qed/wpaper/1227.html
    """

    trend = trend.lower()
    if trend not in ['c', 'nc', 'ct', 'ctt']:
        raise ValueError("trend option %s not understood" % trend)
    y0 = np.asarray(y0)
    y1 = np.asarray(y1)
    if y1.ndim < 2:
        y1 = y1[:, None]
    nobs, k_vars = y1.shape
    k_vars += 1   # add 1 for y0

    if trend == 'nc':
        xx = y1
    else:
        xx = add_trend(y1, trend=trend, prepend=False)

    res_co = OLS(y0, xx).fit()

    res_adf = adfuller(res_co.resid, maxlag=maxlag, autolag=None,
                             regression='nc')
    # no constant or trend, see egranger in Stata and MacKinnon
    if trend == 'nc':
        crit = [np.nan] * 3  # 2010 critical values not available
    else:
        crit = mackinnoncrit(N=k_vars, regression=trend, nobs=nobs - 1)
        #  nobs - 1, the -1 is to match egranger in Stata, I don't know why.
        #  TODO: check nobs or df = nobs - k

    pval_asy = mackinnonp(res_adf[0], regression=trend, N=k_vars)
    return res_adf[0], pval_asy, crit
Ejemplo n.º 33
0
# In[ ]:

---------------------------------------------------------------------------
ImportError                               Traceback (most recent call last)
<ipython-input-396-0c2468c93995> in <module>
      4 X3 = daily_returns[['ETH-USD','XRP-USD','LTC-USD']]
      5 
----> 6 X1 = sm.add_constant(X1)
      7 X2 = sm.add_constant(X2)
      8 X3 = sm.add_constant(X3)

~/opt/anaconda3/lib/python3.7/site-packages/statsmodels/tools/tools.py in add_constant(data, prepend, has_constant)
    294     if _is_using_pandas(data, None) or _is_recarray(data):
    295         from statsmodels.tsa.tsatools import add_trend
--> 296         return add_trend(data, trend='c', prepend=prepend, has_constant=has_constant)
    297 
    298     # Special case for NumPy

~/opt/anaconda3/lib/python3.7/site-packages/statsmodels/tsa/tsatools.py in add_trend(x, trend, prepend, has_constant)
     95                 except:
     96                     return False
---> 97             col_const = x.apply(safe_is_const, 0)
     98         else:
     99             ptp0 = np.ptp(np.asanyarray(x), axis=0)

~/opt/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py in apply(self, func, axis, broadcast, raw, reduce, result_type, args, **kwds)
   6898             If an array is passed, it must be the same length as the data. The
   6899             list can contain any of the other types (except list).
-> 6900             Keys to group by on the pivot table column.  If an array is passed,
   6901             it is being used as the same manner as column values.
Ejemplo n.º 34
0
def trade_allocation(context, data, resOLS):
    
    olsPara = resOLS.params  # OLS paramters for [x, dx, 1]
    resid = resOLS.resid
    ols_var = np.dot(resid, np.transpose(resid))/(resOLS.df_resid)
    
    # Replicate the OLS in adf test to predict the movement of stationary portfolio
    # RecentData = TrainPort[-context.lookback/50:]  #  only the recent data matters
    RecentData = context.TrainPort
    xdiff = np.diff(RecentData)
    nobs = xdiff.shape[0]
    xdall = np.column_stack((RecentData[-nobs:], xdiff[:,None])) 
    x = add_trend(xdall[:,:context.lag+1], 'c')
    y_pred = np.dot(x, olsPara)
    y_actual = xdiff[-nobs:]
        
    profit_potential = y_pred[-1]/context.cost
    # y_mean = np.mean(TrainPort)
    # y_std = np.std(TrainPort)

    # Calculate the score of deviation from current value
    # dev_score = y_pred[-1]#/np.sqrt(ols_var)
    # dev_score = -(y_pred[-1] - y_mean)/y_std
    # dev_score = -(TrainPort[-1] - np.mean(TrainPort))/np.std(TrainPort)
    
    curr_price = context.base_prices[-1,:]
    dollar_allocation = np.multiply(curr_price, context.weights)*profit_potential
    pct_allocation = context.leverage_limit*dollar_allocation/np.sum(np.abs(dollar_allocation))
    pct_allocation = np.asarray(pct_allocation)
    pct_allocation = pct_allocation[0,:]
    
    # print '-'*20
    # print 'ADF test statistics: %f' %adfstat
    # print 'ADF test critvalues: %s' %str(critvalues)
    # print 'ADF pvalue: %f' %pvalue
    # print ' '
    # print dir(resOLS)
    # print 'Predic value: %s' %str(y_pred[-5:])
    # print 'Fitted value: %s' %str(resOLS.fittedvalues[-5:])
    # print 'Actual value: %s' %str(y_actual[-5:])
    # print ' '
    # print 'Latest value of TrainPort: %f' %TrainPort[-1]
    # print 'Mean value of TrainPort: %f' %np.mean(TrainPort)
    # print 'Std Value of TrainPort: %f' %np.std(TrainPort)
    # print ' '
    # print 'avg of actual change: %f' %np.mean(abs(y_actual))
    # print 'std of actual change: %f' %np.std(y_actual)
    # print ' '
    # print 'avg of pred change: %f' %np.mean(abs(y_pred))
    # print 'std of pred change: %f' %np.std(y_pred)
    # print ' '
    # print 'The predicted bar: %f' %y_pred[-1]
    # print 'The current bar: %f' %y_actual[-1]
    # print ' '
    # print 'The ols std: %f' %np.sqrt(ols_var)
    # print 'mse_total %f' %resOLS.mse_total
    # print 'OLS rsquared: %f' %resOLS.rsquared
    # print 'profit_potential: %f' %profit_potential
    # print 'dev_score: %f' %dev_score
    
    # print 'Dollar Allocation: %s' %str(dollar_allocation)
    # if abs(y_pred[-1]) > 2*context.cost/abs(TrainPort[-1]):
    #     return pct_allocation
    # elif abs(y_pred[-1]) < context.cost/abs(TrainPort[-1]):
    #     return pct_allocation*0
    # else:
    #     return None
    
    # Trading window is determined by the abs(dev_score)
    if abs(profit_potential) < context.dev_lower_threshold:
        # Liquidate all positions if fall below lower threshold
        return pct_allocation*0.
    elif abs(profit_potential) < context.dev_upper_threshold:
        # Do nothing
        return None
    else:
        # Rebalance if the dev is above the upper threshold.
        return pct_allocation
Ejemplo n.º 35
0
 def test_dataframe_duplicate(self):
     df = pd.DataFrame(self.arr_2d, columns=["const", "trend"])
     tools.add_trend(df, trend="ct")
     tools.add_trend(df, trend="ct", prepend=True)
Ejemplo n.º 36
0
def adfuller(
    x,
    maxlag=None,
    regression="c",
    autolag="AIC",
    store=False,
    regresults=False,
):
    """
    Augmented Dickey-Fuller unit root test.

    The Augmented Dickey-Fuller test can be used to test for a unit root in a
    univariate process in the presence of serial correlation.

    Parameters
    ----------
    x : array_like, 1d
        The data series to test.
    maxlag : int
        Maximum lag which is included in test, default 12*(nobs/100)^{1/4}.
    regression : {"c","ct","ctt","nc"}
        Constant and trend order to include in regression.

        * "c" : constant only (default).
        * "ct" : constant and trend.
        * "ctt" : constant, and linear and quadratic trend.
        * "nc" : no constant, no trend.

    autolag : {"AIC", "BIC", "t-stat", None}
        Method to use when automatically determining the lag length among the
        values 0, 1, ..., maxlag.

        * If "AIC" (default) or "BIC", then the number of lags is chosen
          to minimize the corresponding information criterion.
        * "t-stat" based choice of maxlag.  Starts with maxlag and drops a
          lag until the t-statistic on the last lag length is significant
          using a 5%-sized test.
        * If None, then the number of included lags is set to maxlag.
    store : bool
        If True, then a result instance is returned additionally to
        the adf statistic. Default is False.
    regresults : bool, optional
        If True, the full regression results are returned. Default is False.

    Returns
    -------
    adf : float
        The test statistic.
    pvalue : float
        MacKinnon's approximate p-value based on MacKinnon (1994, 2010).
    usedlag : int
        The number of lags used.
    nobs : int
        The number of observations used for the ADF regression and calculation
        of the critical values.
    critical values : dict
        Critical values for the test statistic at the 1 %, 5 %, and 10 %
        levels. Based on MacKinnon (2010).
    icbest : float
        The maximized information criterion if autolag is not None.
    resstore : ResultStore, optional
        A dummy class with results attached as attributes.

    Notes
    -----
    The null hypothesis of the Augmented Dickey-Fuller is that there is a unit
    root, with the alternative that there is no unit root. If the pvalue is
    above a critical size, then we cannot reject that there is a unit root.

    The p-values are obtained through regression surface approximation from
    MacKinnon 1994, but using the updated 2010 tables. If the p-value is close
    to significant, then the critical values should be used to judge whether
    to reject the null.

    The autolag option and maxlag for it are described in Greene.

    References
    ----------
    .. [1] W. Green.  "Econometric Analysis," 5th ed., Pearson, 2003.

    .. [2] Hamilton, J.D.  "Time Series Analysis".  Princeton, 1994.

    .. [3] MacKinnon, J.G. 1994.  "Approximate asymptotic distribution functions for
        unit-root and cointegration tests.  `Journal of Business and Economic
        Statistics` 12, 167-76.

    .. [4] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests."  Queen"s
        University, Dept of Economics, Working Papers.  Available at
        http://ideas.repec.org/p/qed/wpaper/1227.html

    Examples
    --------
    See example notebook
    """
    x = array_like(x, "x")
    maxlag = int_like(maxlag, "maxlag", optional=True)
    regression = string_like(regression,
                             "regression",
                             options=("c", "ct", "ctt", "nc"))
    autolag = string_like(autolag,
                          "autolag",
                          optional=True,
                          options=("aic", "bic", "t-stat"))
    store = bool_like(store, "store")
    regresults = bool_like(regresults, "regresults")

    if regresults:
        store = True

    trenddict = {None: "nc", 0: "c", 1: "ct", 2: "ctt"}
    if regression is None or isinstance(regression, int):
        regression = trenddict[regression]
    regression = regression.lower()
    nobs = x.shape[0]

    ntrend = len(regression) if regression != "nc" else 0
    if maxlag is None:
        # from Greene referencing Schwert 1989
        maxlag = int(np.ceil(12.0 * np.power(nobs / 100.0, 1 / 4.0)))
        # -1 for the diff
        maxlag = min(nobs // 2 - ntrend - 1, maxlag)
        if maxlag < 0:
            raise ValueError("sample size is too short to use selected "
                             "regression component")
    elif maxlag > nobs // 2 - ntrend - 1:
        raise ValueError("maxlag must be less than (nobs/2 - 1 - ntrend) "
                         "where n trend is the number of included "
                         "deterministic regressors")
    xdiff = np.diff(x)
    xdall = lagmat(xdiff[:, None], maxlag, trim="both", original="in")
    nobs = xdall.shape[0]

    xdall[:, 0] = x[-nobs - 1:-1]  # replace 0 xdiff with level of x
    xdshort = xdiff[-nobs:]

    if store:
        from statsmodels.stats.diagnostic import ResultsStore

        resstore = ResultsStore()
    if autolag:
        if regression != "nc":
            fullRHS = add_trend(xdall, regression, prepend=True)
        else:
            fullRHS = xdall
        startlag = fullRHS.shape[1] - xdall.shape[1] + 1
        # 1 for level
        # search for lag length with smallest information criteria
        # Note: use the same number of observations to have comparable IC
        # aic and bic: smaller is better

        if not regresults:
            icbest, bestlag = _autolag(OLS, xdshort, fullRHS, startlag, maxlag,
                                       autolag)
        else:
            icbest, bestlag, alres = _autolag(
                OLS,
                xdshort,
                fullRHS,
                startlag,
                maxlag,
                autolag,
                regresults=regresults,
            )
            resstore.autolag_results = alres

        bestlag -= startlag  # convert to lag not column index

        # rerun ols with best autolag
        xdall = lagmat(xdiff[:, None], bestlag, trim="both", original="in")
        nobs = xdall.shape[0]
        xdall[:, 0] = x[-nobs - 1:-1]  # replace 0 xdiff with level of x
        xdshort = xdiff[-nobs:]
        usedlag = bestlag
    else:
        usedlag = maxlag
        icbest = None
    if regression != "nc":
        resols = OLS(xdshort, add_trend(xdall[:, :usedlag + 1],
                                        regression)).fit()
    else:
        resols = OLS(xdshort, xdall[:, :usedlag + 1]).fit()

    adfstat = resols.tvalues[0]
    #    adfstat = (resols.params[0]-1.0)/resols.bse[0]
    # the "asymptotically correct" z statistic is obtained as
    # nobs/(1-np.sum(resols.params[1:-(trendorder+1)])) (resols.params[0] - 1)
    # I think this is the statistic that is used for series that are integrated
    # for orders higher than I(1), ie., not ADF but cointegration tests.

    # Get approx p-value and critical values
    pvalue = mackinnonp(adfstat, regression=regression, N=1)
    critvalues = mackinnoncrit(N=1, regression=regression, nobs=nobs)
    critvalues = {
        "1%": critvalues[0],
        "5%": critvalues[1],
        "10%": critvalues[2],
    }
    if store:
        resstore.resols = resols
        resstore.maxlag = maxlag
        resstore.usedlag = usedlag
        resstore.adfstat = adfstat
        resstore.critvalues = critvalues
        resstore.nobs = nobs
        resstore.H0 = ("The coefficient on the lagged level equals 1 - "
                       "unit root")
        resstore.HA = "The coefficient on the lagged level < 1 - stationary"
        resstore.icbest = icbest
        resstore._str = "Augmented Dickey-Fuller Test Results"
        return adfstat, pvalue, critvalues, resstore
    else:
        if not autolag:
            return adfstat, pvalue, usedlag, nobs, critvalues
        else:
            return adfstat, pvalue, usedlag, nobs, critvalues, icbest
Ejemplo n.º 37
0
 def test_dataframe_duplicate(self):
     df = pd.DataFrame(self.arr_2d, columns=['const', 'trend'])
     tools.add_trend(df, trend='ct')
     tools.add_trend(df, trend='ct', prepend=True)
Ejemplo n.º 38
0
 def test_dataframe_duplicate(self):
     df = pd.DataFrame(self.arr_2d, columns=['const', 'trend'])
     tools.add_trend(df, trend='ct')
     tools.add_trend(df, trend='ct', prepend=True)
Ejemplo n.º 39
0
    def engle_granger_coef(self,
                           y0,
                           y1,
                           trend='c',
                           method='aeg',
                           maxlag=None,
                           autolag='aic',
                           normalize=True,
                           debug=True):
        """
        Engle-Granger Cointegration Coefficient Calculations.

        This equation takes a linear combination of two L(1) time series to
        create a L(0) or stationary time series.

        This is useful if the two series have a similar stochastic long-term
        trend, as it eliminates them and allows you

        Parameters
        ----------
        y0 : array_like
            The first element in cointegrated system. Must be 1-d.
        y1 : array_like
            The remaining elements in cointegrated system.
        trend : str {'c', 'ct'}
            The trend term included in regression for cointegrating equation.

            * 'c' : constant.
            * 'ct' : constant and linear trend.
            * also available quadratic trend 'ctt', and no constant 'nc'.

        method : {'aeg'}
            Only 'aeg' (augmented Engle-Granger) is available.
        maxlag : None or int
            Argument for `adfuller`, largest or given number of lags.
        autolag : str
            Argument for `adfuller`, lag selection criterion.

            * If None, then maxlag lags are used without lag search.
            * If 'AIC' (default) or 'BIC', then the number of lags is chosen
            to minimize the corresponding information criterion.
            * 't-stat' based choice of maxlag.  Starts with maxlag and drops a
            lag until the t-statistic on the last lag length is significant
            using a 5%-sized test.
        normalize: boolean, optional
            As there are infinite scalar combinations that will produce the
            factor, this normalizes the first entry to be 1.
        debug: boolean, optional
            Checks if the series has a possible cointegration factor using the
            Engle-Granger Cointegration Test

        Returns
        -------
        coefs: array
            A vector that will create a L(0) time series if a combination
            exists.

        Notes
        -----
        The series should be checked independently for their integration
        order. The series must be L(1) to get consistent results. You can
        check this by using the int_order function.

        References
        ----------
        .. [1] MacKinnon, J.G. 1994  "Approximate Asymptotic Distribution
        Functions for Unit-Root and Cointegration Tests." Journal of
        Business & Economics Statistics, 12.2, 167-76.
        .. [2] MacKinnon, J.G. 2010.  "Critical Values for Cointegration
        Tests." Queen's University, Dept of Economics Working Papers 1227.
        http://ideas.repec.org/p/qed/wpaper/1227.html
        .. [3] Hamilton, J. D. (1994). Time series analysis
        (Vol. 2, pp. 690-696). Princeton, NJ: Princeton university press.
        """
        if debug:
            coint_t, pvalue, crit_value = coint(y0, y1, trend, method, maxlag,
                                                autolag)
            if pvalue >= .10:
                print('The null hypothesis cannot be rejected')

        trend = string_like(trend, 'trend', options=('c', 'nc', 'ct', 'ctt'))
        nobs, k_vars = y1.shape

        y1 = add_trend(y1, trend=trend, prepend=False)

        eg_model = OLS(y0, y1).fit()
        coefs = eg_model.params[0:k_vars]

        if normalize:
            coefs = coefs / coefs[0]

        return coefs
Ejemplo n.º 40
0
    def fit(self,
            use_mle: bool = False,
            disp: bool = False) -> "ThetaModelResults":
        r"""
        Estimate model parameters.

        Parameters
        ----------
        use_mle : bool, default False
            Estimate the parameters using MLE by fitting an ARIMA(0,1,1) with
            a drift.  If False (the default), estimates parameters using OLS
            of a constant and a time-trend and by fitting a SES to the model
            data.
        disp : bool, default True
            Display iterative output from fitting the model.

        Notes
        -----
        When using MLE, the parameters are estimated from the ARIMA(0,1,1)

        .. math::

           X_t = X_{t-1} + b_0 + (\alpha-1)\epsilon_{t-1} + \epsilon_t

        When estimating the model using 2-step estimation, the model
        parameters are estimated using the OLS regression

        .. math::

           X_t = a_0 + b_0 (t-1) + \eta_t

        and the SES

        .. math::

           \tilde{X}_{t+1} = \alpha X_{t} + (1-\alpha)\tilde{X}_{t}

        Returns
        -------
        ThetaModelResult
            Model results and forecasting
        """
        if self._deseasonalize and self._use_test:
            self._test_seasonality()
        y, seasonal = self._deseasonalize_data()
        if use_mle:
            mod = SARIMAX(y, order=(0, 1, 1), trend="c")
            res = mod.fit(disp=disp)
            params = np.asarray(res.params)
            alpha = params[1] + 1
            if alpha > 1:
                alpha = 0.9998
                res = mod.fit_constrained({"ma.L1": alpha - 1})
                params = np.asarray(res.params)
            b0 = params[0]
            sigma2 = params[-1]
            one_step = res.forecast(1) - b0
        else:
            ct = add_trend(y, "ct", prepend=True)[:, :2]
            ct[:, 1] -= 1
            _, b0 = np.linalg.lstsq(ct, y, rcond=None)[0]
            res = ExponentialSmoothing(
                y, initial_level=y[0],
                initialization_method="known").fit(disp=disp)
            alpha = res.params[0]
            sigma2 = None
            one_step = res.forecast(1)
        return ThetaModelResults(b0, alpha, sigma2, one_step, seasonal,
                                 use_mle, self)
Ejemplo n.º 41
0
def coint(y0,
          y1,
          trend='c',
          method='aeg',
          maxlag=None,
          autolag='aic',
          return_results=None):
    """Test for no-cointegration of a univariate equation

    The null hypothesis is no cointegration. Variables in y0 and y1 are
    assumed to be integrated of order 1, I(1).

    This uses the augmented Engle-Granger two-step cointegration test.
    Constant or trend is included in 1st stage regression, i.e. in
    cointegrating equation.

    **Warning:** The autolag default has changed compared to statsmodels 0.8.
    In 0.8 autolag was always None, no the keyword is used and defaults to
    'aic'. Use `autolag=None` to avoid the lag search.

    Parameters
    ----------
    y1 : array_like, 1d
        first element in cointegrating vector
    y2 : array_like
        remaining elements in cointegrating vector
    trend : str {'c', 'ct'}
        trend term included in regression for cointegrating equation

        * 'c' : constant
        * 'ct' : constant and linear trend
        * also available quadratic trend 'ctt', and no constant 'nc'

    method : string
        currently only 'aeg' for augmented Engle-Granger test is available.
        default might change.
    maxlag : None or int
        keyword for `adfuller`, largest or given number of lags
    autolag : string
        keyword for `adfuller`, lag selection criterion.

        * if None, then maxlag lags are used without lag search
        * if 'AIC' (default) or 'BIC', then the number of lags is chosen
          to minimize the corresponding information criterion
        * 't-stat' based choice of maxlag.  Starts with maxlag and drops a
          lag until the t-statistic on the last lag length is significant
          using a 5%-sized test

    return_results : bool
        for future compatibility, currently only tuple available.
        If True, then a results instance is returned. Otherwise, a tuple
        with the test outcome is returned.
        Set `return_results=False` to avoid future changes in return.

    Returns
    -------
    coint_t : float
        t-statistic of unit-root test on residuals
    pvalue : float
        MacKinnon's approximate, asymptotic p-value based on MacKinnon (1994)
    crit_value : dict
        Critical values for the test statistic at the 1 %, 5 %, and 10 %
        levels based on regression curve. This depends on the number of
        observations.

    Notes
    -----
    The Null hypothesis is that there is no cointegration, the alternative
    hypothesis is that there is cointegrating relationship. If the pvalue is
    small, below a critical size, then we can reject the hypothesis that there
    is no cointegrating relationship.

    P-values and critical values are obtained through regression surface
    approximation from MacKinnon 1994 and 2010.

    If the two series are almost perfectly collinear, then computing the
    test is numerically unstable. However, the two series will be cointegrated
    under the maintained assumption that they are integrated. In this case
    the t-statistic will be set to -inf and the pvalue to zero.

    TODO: We could handle gaps in data by dropping rows with nans in the
    auxiliary regressions. Not implemented yet, currently assumes no nans
    and no gaps in time series.

    References
    ----------
    MacKinnon, J.G. 1994  "Approximate Asymptotic Distribution Functions for
        Unit-Root and Cointegration Tests." Journal of Business & Economics
        Statistics, 12.2, 167-76.
    MacKinnon, J.G. 2010.  "Critical Values for Cointegration Tests."
        Queen's University, Dept of Economics Working Papers 1227.
        http://ideas.repec.org/p/qed/wpaper/1227.html
    """
    trend = trend.lower()
    if trend not in ['c', 'nc', 'ct', 'ctt']:
        raise ValueError("trend option %s not understood" % trend)
    y0 = np.asarray(y0)
    y1 = np.asarray(y1)
    if y1.ndim < 2:
        y1 = y1[:, None]
    nobs, k_vars = y1.shape
    k_vars += 1  # add 1 for y0

    if trend == 'nc':
        xx = y1
    else:
        xx = add_trend(y1, trend=trend, prepend=False)

    res_co = OLS(y0, xx).fit()
    OLS_params = res_co.params

    if res_co.rsquared < 1 - 100 * SQRTEPS:
        res_adf = adfuller(res_co.resid,
                           maxlag=maxlag,
                           autolag=autolag,
                           regression='nc')
    else:
        # Edge case where series are too similar
        res_adf = (-np.inf, )

    pval_asy = mackinnonp(res_adf[0], regression=trend, N=k_vars)
    return res_adf[0], pval_asy, OLS_params
import numpy as np
import pandas as pd
from pandas import Series, read_csv
from matplotlib import pyplot
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.tsatools import add_trend
# trendData = read_csv('data/trend.csv')
# trend = trendData.values.T[0]

# seasonalData = read_csv('data/seasonal.csv')
# seasonal = seasonalData.values.T[0]

# residData = read_csv('data/resid.csv')
# resid = residData.values.T[0]
dat = Series.from_csv(
    'data/Fuzzy_data_resource_JobId_6336594489_5minutes_todeseasonal.csv',
    header=0)
date_range = pd.date_range(dat.index.min(), dat.index.max(), freq="10min")
result = add_trend(dat.values)
# lol = np.add(trend,seasonal,resid)
# lolDf = pd.DataFrame(np.array(lol))
# lolDf.to_csv('data/lol.csv', index=False, header=None)
pyplot.plot(result)
Ejemplo n.º 43
0
 def test_trend_n(self):
     assert_equal(tools.add_trend(self.arr_1d, 'n'), self.arr_1d)
     assert tools.add_trend(self.arr_1d, 'n') is not self.arr_1d
     assert_equal(tools.add_trend(self.arr_2d, 'n'), self.arr_2d)
     assert tools.add_trend(self.arr_2d, 'n') is not self.arr_2d
Ejemplo n.º 44
0
    def dynamic_coefs(self,
                      y0,
                      y1,
                      n_lags=None,
                      trend='c',
                      normalize=True,
                      reverse=False):
        """
        Dynamic Cointegration Coefficient Calculation.

        This equation takes a linear combination of multiple L(1) time
        series to create a L(0) or stationary time series.

        This is useful if the two series have a similar stochastic long-term
        trend, as it eliminates them and allows you.

        Unlike Engle-Granger, this method uses dynamic regression - taking
        an equal combination of lags and leads of the differences of the
        series - to create a more accurate parameter vector. This method
        calculates the lag-lead matricies for the given lag values or searches
        for the best amount of lags using BIC calculations. Once the optimal
        value is found, the calculation is done and returned. The optimal
        lag can be found by using dot notation and finding max_lag. You
        can also find the model by using .model.

        Parameters
        ----------
        y0 : array_like
            The first element in cointegrated system. Must be 1-d.
        y1 : array_like
            The remaining elements in cointegrated system.
        n_lags: int, array, None
            This determines which values the function should search for the
            best vector.

            * int: If an int, the calculation is done for only that lag
            * array: If an array of two integers, the first value is where
                        the search begins and the second is where it ends
            * None: If None is given, the function searches from 2 to
                        ceiling of the cube root of the number of observations
                        divided by two plus two in order to ensure at least
                        one value is searched.
                        I.E last_lag = (n_obs**(1/3) / 2) + 2

        trend : str {'c', 'ct'}
            The trend term included in regression for cointegrating equation.

            * 'c' : constant.
            * 'ct' : constant and linear trend.
            * also available quadratic trend 'ctt', and no constant 'nc'.

        normalize: Boolean
            If true, the first entry in the parameter vector is normalized to
            one and everything else is divided by the first entry. This is
            because any cointegrating vector could be multiplied by a scalar
            and still be a cointegrating vector.
        reverse: Boolean
            The series must be ordered from the latest data points to the last.
            This is in order to calculate the differences. Using this, you can
            reverse the ordering of your data points.

        Returns
        -------
        coefs: array
            A vector that will create a L(0) time series if a
            combination sexists.

        Notes
        -----
        The data must go from the latest observations to the earliest. If not,
        the coef vector will be the opposite sign.

        The series should be checked independently for their integration order.
        The series must be L(1) to get consistent results. You can check this
        by using the int_order function.

        References
        ----------
        .. [1] Stock, J. H., & Watson, M. W. (1993). A simple estimator of
        cointegrating vectors in higher order integrated systems.
        Econometrica: Journal of the Econometric Society, 783-820.
        .. [2] Hamilton, J. D. (1994). Time series analysis
        (Vol. 2, pp. 690-696). Princeton, NJ: Princeton university press.
        """
        self.bics = []
        self.max_val = []
        self.model = ''
        self.coefs = []

        trend = string_like(trend, 'trend', options=('c', 'nc', 'ct', 'ctt'))
        y1 = add_trend(y1, trend=trend, prepend=True)
        y1 = y1.reset_index(drop=True)
        if reverse:
            y0, y1 = y0[::-1], y1[::-1]

        if _is_using_pandas(y0, y1):
            columns = list(y1.columns)

        else:
            # Need to check if NumPy, because I can only support those two
            n_obs, k = y1.shape
            columns = [f'Var_{x}' for x in range(k)]
            y0, y1 = pd.DataFrame(y0), pd.DataFrame(y1)

        if n_lags is None:
            n_obs, k = y1.shape
            dta = pd.DataFrame(np.diff(a=y1, n=1, axis=0))
            for lag in range(2, int(np.ceil(n_obs**(1 / 3) / 2) + 2)):

                df1 = pd.DataFrame(lagmat(dta, lag + 1, trim='backward'))
                cols = dict(zip(list(df1.columns)[::-1][0:k][::-1], columns))
                df1 = df1.rename(columns=cols)

                df2 = pd.DataFrame(lagmat(dta, lag, trim='forward'))

                lags_leads = pd.concat([df1, df2], axis=1, join='outer')
                lags_leads = lags_leads.drop(list(range(0, lag)))
                lags_leads = lags_leads.reset_index(drop=True)

                lags_leads = lags_leads.drop(
                    list(range(len(lags_leads) - lag, len(lags_leads))))

                lags_leads = lags_leads.reset_index(drop=True)
                data_y = y0.drop(list(range(0, lag))).reset_index(drop=True)
                data_y = data_y.drop(
                    list(range(len(data_y) - lag - 1, len(data_y))))
                data_y = data_y.reset_index(drop=True)

                self.bics.append([OLS(data_y, lags_leads).fit().bic, lag])

            self.max_val = max(self.bics, key=lambda item: item[0])
            self.max_val = self.max_val[1]

        elif len(n_lags) == 2:
            start, end = int(n_lags[0]), int(n_lags[1])
            n_obs, k = y1.shape
            dta = pd.DataFrame(np.diff(a=y1, n=1, axis=0))

            for lag in range(start, end + 1):
                df1 = pd.DataFrame(lagmat(dta, lag + 1, trim='backward'))
                cols = dict(zip(list(df1.columns)[::-1][0:k][::-1], columns))
                df1 = df1.rename(columns=cols)

                df2 = pd.DataFrame(lagmat(dta, lag, trim='forward'))

                lags_leads = pd.concat([df1, df2], axis=1, join='outer')
                lags_leads = lags_leads.drop(list(range(0, lag)))
                lags_leads = lags_leads.reset_index(drop=True)
                lags_leads = lags_leads.drop(
                    list(range(len(lags_leads) - lag, len(lags_leads))))
                lags_leads = lags_leads.reset_index(drop=True)

                data_y = y0.drop(list(range(0, lag))).reset_index(drop=True)
                data_y = data_y.drop(
                    list(range(len(data_y) - lag - 1, len(data_y))))
                data_y = data_y.reset_index(drop=True)

                self.bics.append([OLS(data_y, lags_leads).fit().bic, lag])

            self.max_val = max(self.bics, key=lambda item: item[0])
            self.max_val = self.max_val[1]

        elif len(n_lags) == 1:
            self.max_val = int(n_lags)

        else:
            raise ('Make sure your lags are in one of the required forms.')

        dta = pd.DataFrame(np.diff(a=y1, n=1, axis=0))
        # Create a matrix of the lags, this also retains the original matrix,
        # which is why max_val + 1
        df1 = pd.DataFrame(lagmat(dta, self.max_val + 1, trim='backward'))

        # Rename the columns, as we need to keep track of them. We know the
        # original will be the final values
        cols = dict(zip(list(df1.columns)[::-1][0:k][::-1], columns))
        df1 = df1.rename(columns=cols)

        # Do the same, but these are leads, this does not keep the
        # original matrix, thus max_val
        df2 = pd.DataFrame(lagmat(dta, self.max_val, trim='forward'))

        # There are missing data due to the lags and leads, we concat
        # the frames and drop the values of which are missing.
        lags_leads = pd.concat([df1, df2], axis=1, join='outer')
        lags_leads = lags_leads.drop(list(range(0, self.max_val)))
        lags_leads = lags_leads.reset_index(drop=True)
        lags_leads = lags_leads.drop(
            list(range(len(lags_leads) - self.max_val, len(lags_leads))))
        lags_leads.reset_index(drop=True)

        # We also need to do this for the endog values, we need to
        # drop 1 extra due to a loss from first differencing.
        # This will be at the end of the matrix.
        data_y = y0.drop(list(range(0, self.max_val))).reset_index(drop=True)
        data_y = data_y.drop(
            list(range(len(data_y) - self.max_val - 1, len(data_y))))
        data_y = data_y.reset_index(drop=True)

        self.model = OLS(data_y, lags_leads).fit()

        self.coefs = self.model.params[list(y1.columns)]

        if normalize:
            self.coefs = self.coefs / self.coefs[0]

        return (self.coefs)
Ejemplo n.º 45
0
 def test_dataframe_duplicate(self):
     df = pd.DataFrame(self.arr_2d, columns=["const", "trend"])
     tools.add_trend(df, trend="ct")
     tools.add_trend(df, trend="ct", prepend=True)