def decorator(model, params, nobs, burn=500, initial_value=None, x=None, initial_value_vol=None, data_only=False): k_x = 0 if x is not None: x = np.asarray(x) k_x = x.shape[1] if x.shape[0] != nobs + burn: raise ValueError('x must have nobs + burn rows') # added model._lags is not None for LS models lags = model._lags if model._lags is not None else np.zeros((0, 0), int) mc = int(model.constant) + lags.shape[1] + k_x vc = model.volatility.num_params dc = model.distribution.num_params num_params = mc + vc + dc params = ensure1d(params, 'params', series=False) if params.shape[0] != num_params: raise ValueError(f'params has the wrong number of elements. Expected {num_params}, got {params.shape[0]}. ' f'Perhaps you forgot to add the exogenous variables?') dist_params = [] if dc == 0 else params[-dc:] vol_params = params[mc:mc + vc] simulator = model.distribution.simulate(dist_params) sim_data = model.volatility.simulate(vol_params, nobs + burn, simulator, burn, initial_value_vol) errors = sim_data[0] vol = np.sqrt(sim_data[1]) max_lag = np.max(lags) if lags.size else 0 if initial_value is None: initial_value = 0.0 elif not np.isscalar(initial_value): initial_value = ensure1d(initial_value, 'initial_value') if initial_value.shape[0] != max_lag: raise ValueError('initial_value has the wrong shape') y = np.zeros(nobs + burn) y[:max_lag] = initial_value simulate_harx(y, nobs + burn, k_x, max_lag, model.constant, x, errors, params, lags.T.astype(np.int64)) if data_only: return y[burn:] df = dict(data=y[burn:], volatility=vol[burn:], errors=errors[burn:]) df = pd.DataFrame(df) return df
def simulate( self, parameters: Union[int, float, Sequence[Union[float, int]], ArrayLike1D] ) -> Callable[[Union[int, Tuple[int, ...]]], NDArray]: parameters = ensure1d(parameters, "parameters", False) if parameters[0] <= 2.0: raise ValueError("The shape parameter must be larger than 2") self._parameters = parameters return self._simulator
def decorator(model, params, nobs, reps, burn=500, initial_value=None, x=None, initial_value_vol=None): k_x = 0 if x is not None: x = np.asarray(x) k_x = x.shape[1] if x.shape[0] != nobs + burn: raise ValueError('x must have nobs + burn rows') # added model._lags is not None for LS models lags = model._lags if model._lags is not None else np.zeros((0, 0), int) mc = int(model.constant) + lags.shape[1] + k_x vc = model.volatility.num_params dc = model.distribution.num_params num_params = mc + vc + dc params = ensure1d(params, 'params', series=False) if params.shape[0] != num_params: raise ValueError(f'params has the wrong number of elements. Expected {num_params}, got {params.shape[0]}. ' f'Perhaps you forgot to add the exogenous variables?') dist_params = [] if dc == 0 else params[-dc:] vol_params = params[mc:mc + vc] simulator = model.distribution.simulate(dist_params) errors = model.volatility.simulate_mc(vol_params, nobs + burn, reps, simulator, burn, initial_value_vol) max_lag = np.max(lags) if lags.size else 0 if initial_value is None: initial_value = 0 elif np.isscalar(initial_value): initial_value = np.repeat(initial_value, max_lag) elif not np.isscalar(initial_value): initial_value = ensure1d(initial_value, 'initial_value') if initial_value.shape[0] != max_lag: raise ValueError('initial_value has the wrong shape') y = np.zeros((nobs + burn, reps), np.float64) # y is modified in place y[:max_lag, :reps] = initial_value return simulate_harx_mc(y, nobs + burn, reps, k_x, max_lag, model.constant, x, errors, params, lags.T.astype(np.int64))[burn:]
def _cross_section(y: ArrayLike1D, x: ArrayLike2D, trend: str) -> RegressionResults: if trend not in ("n", "c", "ct", "ctt"): raise ValueError('trend must be one of "n", "c", "ct" or "ctt"') y = ensure1d(y, "y", True) x = ensure2d(x, "x") if not isinstance(x, pd.DataFrame): cols = [f"x{i}" for i in range(1, x.shape[1] + 1)] x = pd.DataFrame(x, columns=cols, index=y.index) x = add_trend(x, trend) res = OLS(y, x).fit() return res
def __init__( self, x: ArrayLike, bandwidth: Optional[float] = None, df_adjust: int = 0, center: bool = True, weights: Optional[ArrayLike] = None, force_int: bool = False, ): self._x_orig = ensure2d(x, "x") self._x = np.asarray(self._x_orig) self._center = center if self._center: self._x = self._x - self._x.mean(0) if bandwidth is not None and (not np.isscalar(bandwidth) or bandwidth < 0): raise ValueError("bandwidth must be a non-negative scalar.") self._bandwidth = bandwidth self._auto_bandwidth = bandwidth is None if not np.isscalar(df_adjust) or df_adjust < 0: raise ValueError("df_adjust must be a non-negative integer.") self._df_adjust = int(df_adjust) self._df = self._x.shape[0] - self._df_adjust if self._df <= 0: raise ValueError( "Degrees of freedom is <= 0 after adjusting the sample " "size of x using df_adjust. df_adjust must be less than" f" {self._x.shape[0]}" ) if weights is None: xw = self._x_weights = np.ones((self._x.shape[1], 1)) else: xw = ensure1d(np.asarray(weights), "weights") xw = self._x_weights = xw[:, None] if ( xw.shape[0] != self._x.shape[1] or xw.shape[1] != 1 or np.any(xw < 0) or np.all(xw == 0) ): raise ValueError( f"weights must be a 1 by {self._x.shape[1]} (x.shape[1]) " f"array with non-negative values where at least one value is " "strictly greater than 0." ) self._force_int = force_int
def simulate( self, parameters: Union[Sequence[Union[int, float]], ArrayLike1D], nobs: int, rng: RNGType, burn: int = 500, initial_value: Optional[float] = None) -> Tuple[NDArray, NDArray]: parameters = ensure1d(parameters, "parameters", False) errors = rng(nobs + burn) if initial_value is None: scale = np.ones_like(parameters) persistence = np.sum(parameters[1:] * scale[1:]) if (1.0 - persistence) > 0: initial_value = parameters[0] / (1.0 - persistence) else: from warnings import warn warn(initial_value_warning, InitialValueWarning) initial_value = parameters[0] sigma2 = np.zeros(nobs + burn) data = np.zeros(nobs + burn) max_lag = 1 sigma2[:max_lag] = initial_value data[:max_lag] = np.sqrt(sigma2[:max_lag]) * errors[:max_lag] omega = parameters[0] alpha = parameters[1] beta = parameters[2] theta = parameters[3] sigma2[max_lag] = omega data[max_lag] = errors[max_lag] * np.sqrt(sigma2[max_lag]) for t in range(max_lag + 1, nobs + burn): loc = t - 1 sigma2[t] = \ omega + \ alpha * (data[loc] - theta * np.sqrt(sigma2[loc])) ** 2.0 + \ beta * sigma2[loc] data[t] = errors[t] * np.sqrt(sigma2[t]) return data[burn:], sigma2[burn:]
def __init__(self, y, lags, trend, valid_trends): self._y = ensure1d(y, 'y') self._delta_y = diff(y) self._nobs = self._y.shape[0] self._lags = None self.lags = lags self._valid_trends = valid_trends self._trend = '' self.trend = trend self._stat = None self._critical_values = None self._pvalue = None self.trend = trend self._null_hypothesis = 'The process contains a unit root.' self._alternative_hypothesis = 'The process is weakly stationary.' self._test_name = None self._title = None self._summary_text = None
def __init__(self, y, lags, trend, valid_trends): self._y = ensure1d(y, 'y') self._delta_y = diff(y) self._nobs = self._y.shape[0] self._lags = None self.lags = lags self._valid_trends = valid_trends self._trend = '' self.trend = trend self._stat = None self._critical_values = None self._pvalue = None self.trend = trend self._null_hypothesis = 'The process contains a unit root.' self._alternative_hypothesis = 'The process is weakly stationary.' self._test_name = None self._title = None self._summary_text = None
def _check_constraints( self, parameters: Optional[Union[Sequence[float], ArrayLike1D]]) -> NDArray: bounds = self.bounds(empty(0)) if parameters is not None: params = ensure1d(parameters, "parameters", False) nparams = len(params) else: nparams = 0 if nparams != len(bounds): raise ValueError("parameters must have {0} elements".format( len(bounds))) if len(bounds) == 0: return empty(0) for p, n, b in zip(params, self.name, bounds): if not (b[0] <= p <= b[1]): raise ValueError("{0} does not satisfy the bounds requirement " "of ({1}, {2})".format(n, *b)) return params
def _check_cointegrating_regression( y: ArrayLike1D, x: ArrayLike2D, trend: str, supported_trends: Tuple[str, ...] = ("n", "c", "ct", "ctt"), ) -> CointegrationSetup: y = ensure1d(y, "y", True) x = ensure2d(x, "x") if y.shape[0] != x.shape[0]: raise ValueError( f"The number of observations in y and x differ. y has " f"{y.shape[0]} observtations, and x has {x.shape[0]}.") if not isinstance(x, pd.DataFrame): cols = [f"x{i}" for i in range(1, x.shape[1] + 1)] x_df = pd.DataFrame(x, columns=cols, index=y.index) else: x_df = x trend = trend.lower() if trend.lower() not in supported_trends: trends = ",".join([f'"{st}"' for st in supported_trends]) raise ValueError(f"Unknown trend. Must be one of {{{trends}}}") return CointegrationSetup(y, x_df, trend)
def test_ensure1d(self): out = ensure1d(1.0, 'y') assert_equal(out, np.array([1.0])) out = ensure1d(np.arange(5.0), 'y') assert_equal(out, np.arange(5.0)) out = ensure1d(np.arange(5.0)[:, None], 'y') assert_equal(out, np.arange(5.0)) in_array = np.reshape(np.arange(16.0), (4, 4)) with pytest.raises(ValueError): ensure1d(in_array, 'y') y = Series(np.arange(5.0)) ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) y = DataFrame(y) ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) y.columns = [1] ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) assert ys.name == '1' y = Series(np.arange(5.0), name='series') ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) y = DataFrame(y) ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) ys.name = 1 ys = ensure1d(ys, None, True) assert isinstance(ys, Series) assert ys.name == '1' y = DataFrame(np.reshape(np.arange(10), (5, 2))) with pytest.raises(ValueError): ensure1d(y, 'y')
def simulate( self, params: Union[Sequence[float], ArrayLike1D], nobs: int, burn: int = 500, initial_value: Optional[Union[float, NDArray]] = None, x: Optional[ArrayLike] = None, initial_value_vol: Optional[Union[float, NDArray]] = None, ) -> DataFrame: """ Simulated data from a zero mean model Parameters ---------- params : {ndarray, DataFrame} Parameters to use when simulating the model. Parameter order is [volatility distribution]. There are no mean parameters. nobs : int Length of series to simulate burn : int, optional Number of values to simulate to initialize the model and remove dependence on initial values. initial_value : None This value is not used. x : None This value is not used. initial_value_vol : {ndarray, float}, optional An array or scalar to use when initializing the volatility process. Returns ------- simulated_data : DataFrame DataFrame with columns data containing the simulated values, volatility, containing the conditional volatility and errors containing the errors used in the simulation Examples -------- Basic data simulation with no mean and constant volatility >>> from arch.univariate import ZeroMean >>> import numpy as np >>> zm = ZeroMean() >>> params = np.array([1.0]) >>> sim_data = zm.simulate(params, 1000) Simulating data with a non-trivial volatility process >>> from arch.univariate import GARCH >>> zm.volatility = GARCH(p=1, o=1, q=1) >>> sim_data = zm.simulate([0.05, 0.1, 0.1, 0.8], 300) """ params = ensure1d(params, "params", False) if initial_value is not None or x is not None: raise ValueError("Both initial value and x must be none when " "simulating a constant mean process.") _, vp, dp = self._parse_parameters(params) sim_values = self.volatility.simulate(vp, nobs + burn, self.distribution.simulate(dp), burn, initial_value_vol) errors = sim_values[0] y = errors vol = np.sqrt(sim_values[1]) df = dict(data=y[burn:], volatility=vol[burn:], errors=errors[burn:]) df = DataFrame(df) return df
def simulate( self, params: Sequence[float], nobs: int, burn: int = 500, initial_value: Optional[Union[float, NDArray]] = None, x: Optional[ArrayLike] = None, initial_value_vol: Optional[Union[float, NDArray]] = None, ) -> DataFrame: """ Simulates data from a linear regression, AR or HAR models Parameters ---------- params : ndarray Parameters to use when simulating the model. Parameter order is [mean volatility distribution] where the parameters of the mean model are ordered [constant lag[0] lag[1] ... lag[p] ex[0] ... ex[k-1]] where lag[j] indicates the coefficient on the jth lag in the model and ex[j] is the coefficient on the jth exogenous variable. nobs : int Length of series to simulate burn : int, optional Number of values to simulate to initialize the model and remove dependence on initial values. initial_value : {ndarray, float}, optional Either a scalar value or `max(lags)` array set of initial values to use when initializing the model. If omitted, 0.0 is used. x : {ndarray, DataFrame}, optional nobs + burn by k array of exogenous variables to include in the simulation. initial_value_vol : {ndarray, float}, optional An array or scalar to use when initializing the volatility process. Returns ------- simulated_data : DataFrame DataFrame with columns data containing the simulated values, volatility, containing the conditional volatility and errors containing the errors used in the simulation Examples -------- >>> import numpy as np >>> from arch.univariate import HARX, GARCH >>> harx = HARX(lags=[1, 5, 22]) >>> harx.volatility = GARCH() >>> harx_params = np.array([1, 0.2, 0.3, 0.4]) >>> garch_params = np.array([0.01, 0.07, 0.92]) >>> params = np.concatenate((harx_params, garch_params)) >>> sim_data = harx.simulate(params, 1000) Simulating models with exogenous regressors requires the regressors to have nobs plus burn data points >>> nobs = 100 >>> burn = 200 >>> x = np.random.randn(nobs + burn, 2) >>> x_params = np.array([1.0, 2.0]) >>> params = np.concatenate((harx_params, x_params, garch_params)) >>> sim_data = harx.simulate(params, nobs=nobs, burn=burn, x=x) """ k_x = 0 if x is not None: k_x = x.shape[1] if x.shape[0] != nobs + burn: raise ValueError("x must have nobs + burn rows") assert self._lags is not None mc = int(self.constant) + self._lags.shape[1] + k_x vc = self.volatility.num_params dc = self.distribution.num_params num_params = mc + vc + dc params = ensure1d(params, "params", series=False) if params.shape[0] != num_params: raise ValueError("params has the wrong number of elements. " "Expected " + str(num_params) + ", got " + str(params.shape[0])) dist_params = [] if dc == 0 else params[-dc:] vol_params = params[mc:mc + vc] simulator = self.distribution.simulate(dist_params) sim_data = self.volatility.simulate(vol_params, nobs + burn, simulator, burn, initial_value_vol) errors = sim_data[0] vol = np.sqrt(sim_data[1]) max_lag = np.max(self._lags) y = np.zeros(nobs + burn) if initial_value is None: initial_value = 0.0 elif not np.isscalar(initial_value): initial_value = ensure1d(initial_value, "initial_value") if initial_value.shape[0] != max_lag: raise ValueError("initial_value has the wrong shape") y[:max_lag] = initial_value for t in range(max_lag, nobs + burn): ind = 0 if self.constant: y[t] = params[ind] ind += 1 for lag in self._lags.T: y[t] += params[ind] * y[t - lag[1]:t - lag[0]].mean() ind += 1 for i in range(k_x): y[t] += params[ind] * x[t, i] y[t] += errors[t] df = dict(data=y[burn:], volatility=vol[burn:], errors=errors[burn:]) df = DataFrame(df) return df
def test_ensure1d(self): out = ensure1d(1.0, 'y') assert_equal(out, np.array([1.0])) out = ensure1d(np.arange(5.0), 'y') assert_equal(out, np.arange(5.0)) out = ensure1d(np.arange(5.0)[:, None], 'y') assert_equal(out, np.arange(5.0)) in_array = np.reshape(np.arange(16.0), (4, 4)) assert_raises(ValueError, ensure1d, in_array, 'y') y = Series(np.arange(5.0)) ys = ensure1d(y, 'y') assert_true(isinstance(ys, np.ndarray)) ys = ensure1d(y, 'y', True) assert_true(isinstance(ys, Series)) y = DataFrame(y) ys = ensure1d(y, 'y') assert_true(isinstance(ys, np.ndarray)) ys = ensure1d(y, 'y', True) assert_true(isinstance(ys, Series)) y = Series(np.arange(5.0), name='series') ys = ensure1d(y, 'y') assert_true(isinstance(ys, np.ndarray)) ys = ensure1d(y, 'y', True) assert_true(isinstance(ys, Series)) y = DataFrame(y) ys = ensure1d(y, 'y') assert_true(isinstance(ys, np.ndarray)) ys = ensure1d(y, 'y', True) assert_true(isinstance(ys, Series)) y = DataFrame(np.reshape(np.arange(10), (5, 2))) assert_raises(ValueError, ensure1d, y, 'y')
def simulate(self, params, nobs, burn=500, initial_value=None, x=None, initial_value_vol=None): """ Simulates data from a linear regression, AR or HAR models Parameters ---------- params : array Parameters to use when simulating the model. Parameter order is [mean volatility distribution] where the parameters of the mean model are ordered [constant lag[0] lag[1] ... lag[p] ex[0] ... ex[k-1]] where lag[j] indicates the coefficient on the jth lag in the model and ex[j] is the coefficient on the jth exogenous variable. nobs : int Length of series to simulate burn : int, optional Number of values to simulate to initialize the model and remove dependence on initial values. initial_value : array or float, optional Either a scalar value or `max(lags)` array set of initial values to use when initializing the model. If omitted, 0.0 is used. x : array, optional nobs + burn by k array of exogenous variables to include in the simulation. initial_value_vol : array or float, optional An array or scalar to use when initializing the volatility process. Returns ------- simulated_data : DataFrame DataFrame with columns data containing the simulated values, volatility, containing the conditional volatility and errors containing the errors used in the simulation Examples -------- >>> import numpy as np >>> from arch.univariate import HARX, GARCH >>> harx = HARX(lags=[1, 5, 22]) >>> harx.volatility = GARCH() >>> harx_params = np.array([1, 0.2, 0.3, 0.4]) >>> garch_params = np.array([0.01, 0.07, 0.92]) >>> params = np.concatenate((harx_params, garch_params)) >>> sim_data = harx.simulate(params, 1000) Simulating models with exogenous regressors requires the regressors to have nobs plus burn data points >>> nobs = 100 >>> burn = 200 >>> x = np.random.randn(nobs + burn, 2) >>> x_params = np.array([1.0, 2.0]) >>> params = np.concatenate((harx_params, x_params, garch_params)) >>> sim_data = harx.simulate(params, nobs=nobs, burn=burn, x=x) """ k_x = 0 if x is not None: k_x = x.shape[1] if x.shape[0] != nobs + burn: raise ValueError('x must have nobs + burn rows') mc = int(self.constant) + self._lags.shape[1] + k_x vc = self.volatility.num_params dc = self.distribution.num_params num_params = mc + vc + dc params = ensure1d(params, 'params', series=False) if params.shape[0] != num_params: raise ValueError('params has the wrong number of elements. ' 'Expected ' + str(num_params) + ', got ' + str(params.shape[0])) dist_params = [] if dc == 0 else params[-dc:] vol_params = params[mc:mc + vc] simulator = self.distribution.simulate(dist_params) sim_data = self.volatility.simulate(vol_params, nobs + burn, simulator, burn, initial_value_vol) errors = sim_data[0] vol = np.sqrt(sim_data[1]) max_lag = np.max(self._lags) y = zeros(nobs + burn) if initial_value is None: initial_value = 0.0 elif not isscalar(initial_value): initial_value = ensure1d(initial_value, 'initial_value') if initial_value.shape[0] != max_lag: raise ValueError('initial_value has the wrong shape') y[:max_lag] = initial_value for t in range(max_lag, nobs + burn): ind = 0 if self.constant: y[t] = params[ind] ind += 1 for lag in self._lags.T: y[t] += params[ind] * y[t - lag[1]:t - lag[0]].mean() ind += 1 for i in range(k_x): y[t] += params[ind] * x[t, i] y[t] += errors[t] df = dict(data=y[burn:], volatility=vol[burn:], errors=errors[burn:]) df = DataFrame(df) return df
def test_ensure1d(self): out = ensure1d(1.0, 'y') assert_equal(out, np.array([1.0])) out = ensure1d(np.arange(5.0), 'y') assert_equal(out, np.arange(5.0)) out = ensure1d(np.arange(5.0)[:, None], 'y') assert_equal(out, np.arange(5.0)) in_array = np.reshape(np.arange(16.0), (4, 4)) with pytest.raises(ValueError): ensure1d(in_array, 'y') y = Series(np.arange(5.0)) ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) y = DataFrame(y) ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) y.columns = [1] ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) assert ys.name == '1' y = Series(np.arange(5.0), name='series') ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) y = DataFrame(y) ys = ensure1d(y, 'y') assert isinstance(ys, np.ndarray) ys = ensure1d(y, 'y', True) assert isinstance(ys, Series) ys.name = 1 ys = ensure1d(ys, None, True) assert isinstance(ys, Series) assert ys.name == '1' y = DataFrame(np.reshape(np.arange(10), (5, 2))) with pytest.raises(ValueError): ensure1d(y, 'y')