class BaseCV(with_metaclass(ABCMeta)): """ BaseCV class. It computes the cross validation error of a given model. All the cross validation classes can be derived by this one (e.g. GamCV, LassoCV,...) """ def __init__(self, cv_iterator, endog, exog): self.cv_iterator = cv_iterator self.exog = exog self.endog = endog # TODO: cv_iterator.split only needs nobs from endog or exog self.train_test_cv_indices = self.cv_iterator.split(self.exog, self.endog, label=None) def fit(self, **kwargs): # kwargs are the input values for the fit method of the # cross-validated object cv_err = [] for train_index, test_index in self.train_test_cv_indices: cv_err.append(self._error(train_index, test_index, **kwargs)) return np.array(cv_err) @abstractmethod def _error(self, train_index, test_index, **kwargs): # train the model on the train set # and returns the error on the test set pass
class BasePenaltiesPathCV(with_metaclass(ABCMeta)): """ Base class for cross validation over a grid of parameters. The best parameter is saved in alpha_cv This class is currently not used """ def __init__(self, alphas): self.alphas = alphas self.alpha_cv = None self.cv_error = None self.cv_std = None def plot_path(self): if have_matplotlib: plt.plot(self.alphas, self.cv_error, c='black') plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std, c='blue') plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std, c='blue') plt.plot(self.alphas, self.cv_error, 'o', c='black') plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std, 'o', c='blue') plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std, 'o', c='blue') return
class BasePenaltiesPathCV(with_metaclass(ABCMeta)): """ Base class for cross validation over a grid of parameters. The best parameter is saved in alpha_cv This class is currently not used """ def __init__(self, alphas): self.alphas = alphas self.alpha_cv = None self.cv_error = None self.cv_std = None def plot_path(self): from statsmodels.graphics.utils import _import_mpl plt = _import_mpl() plt.plot(self.alphas, self.cv_error, c='black') plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std, c='blue') plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std, c='blue') plt.plot(self.alphas, self.cv_error, 'o', c='black') plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std, 'o', c='blue') plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std, 'o', c='blue') return
class BaseCrossValidator(with_metaclass(ABCMeta)): """ The BaseCrossValidator class is a base class for all the iterators that split the data in train and test as for example KFolds or LeavePOut """ def __init__(self): pass @abstractmethod def split(self): pass
class UnivariateGamSmoother(with_metaclass(ABCMeta)): """Base Class for single smooth component """ def __init__(self, x, constraints=None, variable_name='x'): self.x = x self.constraints = constraints self.variable_name = variable_name self.nobs, self.k_variables = len(x), 1 base4 = self._smooth_basis_for_single_variable() if constraints == 'center': constraints = base4[0].mean(0)[None, :] if constraints is not None and not isinstance(constraints, str): ctransf = transf_constraints(constraints) self.ctransf = ctransf else: # subclasses might set ctransf directly # only used if constraints is None if not hasattr(self, 'ctransf'): self.ctransf = None self.basis, self.der_basis, self.der2_basis, self.cov_der2 = base4 if self.ctransf is not None: ctransf = self.ctransf # transform attributes that are not None if base4[0] is not None: self.basis = base4[0].dot(ctransf) if base4[1] is not None: self.der_basis = base4[1].dot(ctransf) if base4[2] is not None: self.der2_basis = base4[2].dot(ctransf) if base4[3] is not None: self.cov_der2 = ctransf.T.dot(base4[3]).dot(ctransf) self.dim_basis = self.basis.shape[1] self.col_names = [ self.variable_name + "_s" + str(i) for i in range(self.dim_basis) ] @abstractmethod def _smooth_basis_for_single_variable(self): return
class AdditiveGamSmoother(with_metaclass(ABCMeta)): """Base class for additive smooth components """ def __init__(self, x, variable_names=None, include_intercept=False, **kwargs): # get pandas names before using asarray if isinstance(x, pd.DataFrame): data_names = x.columns.tolist() elif isinstance(x, pd.Series): data_names = [x.name] else: data_names = None x = np.asarray(x) if x.ndim == 1: self.x = x.copy() self.x.shape = (len(x), 1) else: self.x = x self.nobs, self.k_variables = self.x.shape if isinstance(include_intercept, bool): self.include_intercept = [include_intercept] * self.k_variables else: self.include_intercept = include_intercept if variable_names is None: if data_names is not None: self.variable_names = data_names else: self.variable_names = ['x' + str(i) for i in range(self.k_variables)] else: self.variable_names = variable_names self.smoothers = self._make_smoothers_list() self.basis = np.hstack(list(smoother.basis for smoother in self.smoothers)) self.dim_basis = self.basis.shape[1] self.penalty_matrices = [smoother.cov_der2 for smoother in self.smoothers] self.col_names = [] for smoother in self.smoothers: self.col_names.extend(smoother.col_names) self.mask = [] last_column = 0 for smoother in self.smoothers: mask = np.array([False] * self.dim_basis) mask[last_column:smoother.dim_basis + last_column] = True last_column = last_column + smoother.dim_basis self.mask.append(mask) @abstractmethod def _make_smoothers_list(self): pass def transform(self, x_new): """create the spline basis for new observations The main use of this stateful transformation is for prediction using the same specification of the spline basis. Parameters ---------- x_new: ndarray observations of the underlying explanatory variable Returns ------- basis : ndarray design matrix for the spline basis for given ``x_new``. """ if x_new.ndim == 1 and self.k_variables == 1: x_new = x_new.reshape(-1, 1) exog = np.hstack(list(self.smoothers[i].transform(x_new[:, i]) for i in range(self.k_variables))) return exog