def _validate_data(self) -> None: p = self.portfolios.ndarray f = self.factors.ndarray if p.shape[0] != f.shape[0]: raise ValueError("The number of observations in portfolios and " "factors is not the same.") self._drop_missing() p = self.portfolios.ndarray f = self.factors.ndarray if has_constant(p)[0]: raise ValueError("portfolios must not contains a constant or " "equivalent and must not have rank\n" "less than the dimension of the smaller shape.") if has_constant(f)[0]: raise ValueError( "factors must not contain a constant or equivalent.") if np.linalg.matrix_rank(f) < f.shape[1]: raise ValueError( "Model cannot be estimated. factors do not have full column rank." ) if p.shape[0] < (f.shape[1] + 1): raise ValueError( "Model cannot be estimated. portfolios must have factors + 1 or " "more returns to\nestimate the model parameters.")
def s(self): """HAC score covariance estimate""" x, z, eps = self.x, self.z, self.eps nobs, nvar = x.shape pinvz = self._pinvz xhat = z @ (pinvz @ x) xhat_e = xhat * eps kernel = self.config['kernel'] bw = self.config['bandwidth'] if bw is None: self._auto_bandwidth = True from linearmodels.utility import has_constant const, loc = has_constant(xhat) sel = ones((xhat.shape[1], 1)) if const: sel[loc] = 0 scores = xhat_e @ sel bw = kernel_optimal_bandwidth(scores, kernel) self._bandwidth = bw w = self._kernels[kernel](bw, nobs - 1) s = _cov_kernel(xhat_e, w) return self._scale * s
def test_hasconstant(): x = np.random.randn(100, 3) hc, loc = has_constant(x) assert bool(hc) is False assert loc is None x[:, 0] = 1 hc, loc = has_constant(x) assert hc is True assert loc == 0 x[:, 0] = 2 hc, loc = has_constant(x) assert hc is True assert loc == 0 x[::2, 0] = 0 x[:, 1] = 1 x[1::2, 1] = 0 hc, loc = has_constant(x) assert hc is True
def _validate_data(self): p = self.portfolios.ndarray f = self.factors.ndarray if p.shape[0] != f.shape[0]: raise ValueError('The number of observations in portfolios and ' 'factors is not the same.') self._drop_missing() p = self.portfolios.ndarray f = self.factors.ndarray if has_constant(p)[0]: raise ValueError('portfolios must not contains a constant or equivalent.') if has_constant(f)[0]: raise ValueError('factors must not contain a constant or equivalent.') if np.linalg.matrix_rank(f) < f.shape[1]: raise ValueError('Model cannot be estimated. factors do not have full column rank.') if np.linalg.matrix_rank(p) < p.shape[1]: raise ValueError('Model cannot be estimated. portfolios do not have full column rank.')
def _validate_inputs(self): x, z = self._x, self._z if x.shape[1] == 0: raise ValueError('Model must contain at least one regressor.') if self.instruments.shape[1] < self.endog.shape[1]: raise ValueError('The number of instruments ({0}) must be at least ' 'as large as the number of endogenous regressors' ' ({1}).'.format(self.instruments.shape[1], self.endog.shape[1])) if matrix_rank(x) < x.shape[1]: raise ValueError('regressors [exog endog] do not have full ' 'column rank') if matrix_rank(z) < z.shape[1]: raise ValueError('instruments [exog instruments] do not have ' 'full column rank') self._has_constant, self._const_loc = has_constant(x)
def _validate_inputs(self) -> None: x, z = self._x, self._z if x.shape[1] == 0: raise ValueError("Model must contain at least one regressor.") if self.instruments.shape[1] < self.endog.shape[1]: raise ValueError( "The number of instruments ({0}) must be at least " "as large as the number of endogenous regressors" " ({1}).".format(self.instruments.shape[1], self.endog.shape[1])) if matrix_rank(x) < x.shape[1]: raise ValueError("regressors [exog endog] do not have full " "column rank") if matrix_rank(z) < z.shape[1]: raise ValueError("instruments [exog instruments] do not have " "full column rank") self._has_constant, self._const_loc = has_constant(x)
def _validate_data(self): ids = [] for i, key in enumerate(self._equations): self._eq_labels.append(key) eq_data = self._equations[key] dep_name = 'dependent_' + str(i) exog_name = 'exog_' + str(i) if isinstance(eq_data, (tuple, list)): self._dependent.append(IVData(eq_data[0], var_name=dep_name)) ids.append(id(eq_data[1])) self._exog.append(IVData(eq_data[1], var_name=exog_name)) if len(eq_data) == 3: self._weights.append(IVData(eq_data[2])) else: dep = self._dependent[-1].ndarray self._weights.append(IVData(ones_like(dep))) elif isinstance(eq_data, dict): self._dependent.append(IVData(eq_data['dependent'], var_name=dep_name)) ids.append(id(eq_data['exog'])) self._exog.append(IVData(eq_data['exog'], var_name=exog_name)) if 'weights' in eq_data: self._weights.append(IVData(eq_data['weights'])) else: dep = self._dependent[-1].ndarray self._weights.append(IVData(ones_like(dep))) else: msg = UNKNOWN_EQ_TYPE.format(key=key, type=type(vars)) raise TypeError(msg) for lhs, rhs in zip(self._dependent, self._exog): rhs_a = rhs.ndarray lhs_a = lhs.ndarray if lhs_a.shape[0] != rhs_a.shape[0]: raise ValueError('Dependent and exogenous do not have the same' ' number of observations') self._drop_missing() self._common_exog = len(set(ids)) == 1 constant = [] constant_loc = [] for lhs, rhs, label in zip(self._dependent, self._exog, self._eq_labels): self._param_names.extend([label + '_' + col for col in rhs.cols]) rhs_a = rhs.ndarray lhs_a = lhs.ndarray if lhs_a.shape[0] <= rhs_a.shape[1]: raise ValueError('Fewer observations than variables') if matrix_rank(rhs_a) < rhs_a.shape[1]: raise ValueError('Exogenous variable arrays are not all full ' 'rank') const, const_loc = has_constant(rhs_a) constant.append(const) constant_loc.append(const_loc) self._has_constant = Series(constant, index=[d.cols[0] for d in self._dependent]) self._constant_loc = constant_loc for dep, exog, w in zip(self._dependent, self._exog, self._weights): y = dep.ndarray x = exog.ndarray w = w.ndarray w = w / nanmean(w) w_sqrt = np.sqrt(w) self._w.append(w) self._y.append(y) self._x.append(x) self._wy.append(y * w_sqrt) self._wx.append(x * w_sqrt)
def _validate_data(self): ids = [] for i, key in enumerate(self._equations): self._eq_labels.append(key) eq_data = self._equations[key] dep_name = 'dependent_' + str(i) exog_name = 'exog_' + str(i) endog_name = 'endog_' + str(i) instr_name = 'instr_' + str(i) if isinstance(eq_data, (tuple, list)): dep = IVData(eq_data[0], var_name=dep_name) self._dependent.append(dep) current_id = id(eq_data[1]) self._exog.append(IVData(eq_data[1], var_name=exog_name)) endog = IVData(eq_data[2], var_name=endog_name, nobs=dep.shape[0]) if endog.shape[1] > 0: current_id = (current_id, id(eq_data[2])) ids.append(current_id) self._endog.append(endog) self._instr.append( IVData(eq_data[3], var_name=instr_name, nobs=dep.shape[0])) if len(eq_data) == 5: self._weights.append(IVData(eq_data[4])) else: dep = self._dependent[-1].ndarray self._weights.append(IVData(ones_like(dep))) elif isinstance(eq_data, dict): dep = IVData(eq_data['dependent'], var_name=dep_name) self._dependent.append(dep) current_id = id(eq_data['exog']) self._exog.append(IVData(eq_data['exog'], var_name=exog_name)) endog = eq_data.get('endog', None) endog = IVData(endog, var_name=endog_name, nobs=dep.shape[0]) self._endog.append(endog) if 'endog' in eq_data: current_id = (current_id, id(eq_data['endog'])) ids.append(current_id) instr = eq_data.get('instruments', None) instr = IVData(instr, var_name=instr_name, nobs=dep.shape[0]) self._instr.append(instr) if 'weights' in eq_data: self._weights.append(IVData(eq_data['weights'])) else: self._weights.append(IVData(ones(dep.shape))) else: msg = UNKNOWN_EQ_TYPE.format(key=key, type=type(vars)) raise TypeError(msg) self._has_instruments = False for instr in self._instr: self._has_instruments = self._has_instruments or (instr.shape[1] > 1) for i, comps in enumerate( zip(self._dependent, self._exog, self._endog, self._instr)): shapes = list(map(lambda a: a.shape[0], comps)) if min(shapes) != max(shapes): raise ValueError( 'Dependent, exogenous, endogenous and ' 'instruments do not have the same number of ' 'observations in eq {eq}'.format(eq=self._eq_labels[i])) self._drop_missing() self._common_exog = len(set(ids)) == 1 if self._common_exog: # Common exog requires weights are also equal w0 = self._weights[0].ndarray for w in self._weights: self._common_exog = self._common_exog and np.all( w.ndarray == w0) constant = [] constant_loc = [] for dep, exog, endog, instr, w, label in zip(self._dependent, self._exog, self._endog, self._instr, self._weights, self._eq_labels): y = dep.ndarray x = np.concatenate([exog.ndarray, endog.ndarray], 1) z = np.concatenate([exog.ndarray, instr.ndarray], 1) w = w.ndarray w = w / nanmean(w) w_sqrt = np.sqrt(w) self._w.append(w) self._y.append(y) self._x.append(x) self._z.append(z) self._wy.append(y * w_sqrt) self._wx.append(x * w_sqrt) self._wz.append(z * w_sqrt) cols = list(exog.cols) + list(endog.cols) self._param_names.extend([label + '_' + col for col in cols]) if y.shape[0] <= x.shape[1]: raise ValueError('Fewer observations than variables in ' 'equation {eq}'.format(eq=label)) if matrix_rank(x) < x.shape[1]: raise ValueError('Equation {eq} regressor array is not full ' 'rank'.format(eq=label)) if x.shape[1] > z.shape[1]: raise ValueError('Equation {eq} has fewer instruments than ' 'endogenous variables.'.format(eq=label)) if z.shape[1] > z.shape[0]: raise ValueError('Fewer observations than instruments in ' 'equation {eq}'.format(eq=label)) if matrix_rank(z) < z.shape[1]: raise ValueError('Equation {eq} instrument array is full ' 'rank'.format(eq=label)) for lhs, rhs, label in zip(self._y, self._x, self._eq_labels): const, const_loc = has_constant(rhs) constant.append(const) constant_loc.append(const_loc) self._has_constant = Series(constant, index=[d.cols[0] for d in self._dependent]) self._constant_loc = constant_loc