Exemple #1
0
class BaseCV(with_metaclass(ABCMeta)):
    """
    BaseCV class. It computes the cross validation error of a given model.
    All the cross validation classes can be derived by this one
    (e.g. GamCV, LassoCV,...)
    """

    def __init__(self, cv_iterator, endog, exog):
        self.cv_iterator = cv_iterator
        self.exog = exog
        self.endog = endog
        # TODO: cv_iterator.split only needs nobs from endog or exog
        self.train_test_cv_indices = self.cv_iterator.split(self.exog,
                                                            self.endog,
                                                            label=None)

    def fit(self, **kwargs):
        # kwargs are the input values for the fit method of the
        # cross-validated object

        cv_err = []

        for train_index, test_index in self.train_test_cv_indices:
            cv_err.append(self._error(train_index, test_index, **kwargs))

        return np.array(cv_err)

    @abstractmethod
    def _error(self, train_index, test_index, **kwargs):
        # train the model on the train set
        #   and returns the error on the test set
        pass
Exemple #2
0
class BasePenaltiesPathCV(with_metaclass(ABCMeta)):
    """
    Base class for cross validation over a grid of parameters.

    The best parameter is saved in alpha_cv

    This class is currently not used
    """

    def __init__(self, alphas):
        self.alphas = alphas
        self.alpha_cv = None
        self.cv_error = None
        self.cv_std = None

    def plot_path(self):
        if have_matplotlib:
            plt.plot(self.alphas, self.cv_error, c='black')
            plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std,
                     c='blue')
            plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std,
                     c='blue')

            plt.plot(self.alphas, self.cv_error, 'o', c='black')
            plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std, 'o',
                     c='blue')
            plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std, 'o',
                     c='blue')

            return
class BasePenaltiesPathCV(with_metaclass(ABCMeta)):
    """
    Base class for cross validation over a grid of parameters.

    The best parameter is saved in alpha_cv

    This class is currently not used
    """
    def __init__(self, alphas):
        self.alphas = alphas
        self.alpha_cv = None
        self.cv_error = None
        self.cv_std = None

    def plot_path(self):
        from statsmodels.graphics.utils import _import_mpl
        plt = _import_mpl()
        plt.plot(self.alphas, self.cv_error, c='black')
        plt.plot(self.alphas, self.cv_error + 1.96 * self.cv_std, c='blue')
        plt.plot(self.alphas, self.cv_error - 1.96 * self.cv_std, c='blue')

        plt.plot(self.alphas, self.cv_error, 'o', c='black')
        plt.plot(self.alphas,
                 self.cv_error + 1.96 * self.cv_std,
                 'o',
                 c='blue')
        plt.plot(self.alphas,
                 self.cv_error - 1.96 * self.cv_std,
                 'o',
                 c='blue')

        return
Exemple #4
0
class BaseCrossValidator(with_metaclass(ABCMeta)):
    """
    The BaseCrossValidator class is a base class for all the iterators that
    split the data in train and test as for example KFolds or LeavePOut
    """
    def __init__(self):
        pass

    @abstractmethod
    def split(self):
        pass
Exemple #5
0
class UnivariateGamSmoother(with_metaclass(ABCMeta)):
    """Base Class for single smooth component
    """
    def __init__(self, x, constraints=None, variable_name='x'):
        self.x = x
        self.constraints = constraints
        self.variable_name = variable_name
        self.nobs, self.k_variables = len(x), 1

        base4 = self._smooth_basis_for_single_variable()
        if constraints == 'center':
            constraints = base4[0].mean(0)[None, :]

        if constraints is not None and not isinstance(constraints, str):
            ctransf = transf_constraints(constraints)
            self.ctransf = ctransf
        else:
            # subclasses might set ctransf directly
            # only used if constraints is None
            if not hasattr(self, 'ctransf'):
                self.ctransf = None

        self.basis, self.der_basis, self.der2_basis, self.cov_der2 = base4
        if self.ctransf is not None:
            ctransf = self.ctransf
            # transform attributes that are not None
            if base4[0] is not None:
                self.basis = base4[0].dot(ctransf)
            if base4[1] is not None:
                self.der_basis = base4[1].dot(ctransf)
            if base4[2] is not None:
                self.der2_basis = base4[2].dot(ctransf)
            if base4[3] is not None:
                self.cov_der2 = ctransf.T.dot(base4[3]).dot(ctransf)

        self.dim_basis = self.basis.shape[1]
        self.col_names = [
            self.variable_name + "_s" + str(i) for i in range(self.dim_basis)
        ]

    @abstractmethod
    def _smooth_basis_for_single_variable(self):
        return
Exemple #6
0
class AdditiveGamSmoother(with_metaclass(ABCMeta)):
    """Base class for additive smooth components
    """
    def __init__(self, x, variable_names=None, include_intercept=False,
                 **kwargs):

        # get pandas names before using asarray
        if isinstance(x, pd.DataFrame):
            data_names = x.columns.tolist()
        elif isinstance(x, pd.Series):
            data_names = [x.name]
        else:
            data_names = None

        x = np.asarray(x)

        if x.ndim == 1:
            self.x = x.copy()
            self.x.shape = (len(x), 1)
        else:
            self.x = x

        self.nobs, self.k_variables = self.x.shape
        if isinstance(include_intercept, bool):
            self.include_intercept = [include_intercept] * self.k_variables
        else:
            self.include_intercept = include_intercept

        if variable_names is None:
            if data_names is not None:
                self.variable_names = data_names
            else:
                self.variable_names = ['x' + str(i)
                                       for i in range(self.k_variables)]
        else:
            self.variable_names = variable_names

        self.smoothers = self._make_smoothers_list()
        self.basis = np.hstack(list(smoother.basis
                               for smoother in self.smoothers))
        self.dim_basis = self.basis.shape[1]
        self.penalty_matrices = [smoother.cov_der2
                                 for smoother in self.smoothers]
        self.col_names = []
        for smoother in self.smoothers:
            self.col_names.extend(smoother.col_names)

        self.mask = []
        last_column = 0
        for smoother in self.smoothers:
            mask = np.array([False] * self.dim_basis)
            mask[last_column:smoother.dim_basis + last_column] = True
            last_column = last_column + smoother.dim_basis
            self.mask.append(mask)

    @abstractmethod
    def _make_smoothers_list(self):
        pass

    def transform(self, x_new):
        """create the spline basis for new observations

        The main use of this stateful transformation is for prediction
        using the same specification of the spline basis.

        Parameters
        ----------
        x_new: ndarray
            observations of the underlying explanatory variable

        Returns
        -------
        basis : ndarray
            design matrix for the spline basis for given ``x_new``.
        """
        if x_new.ndim == 1 and self.k_variables == 1:
            x_new = x_new.reshape(-1, 1)
        exog = np.hstack(list(self.smoothers[i].transform(x_new[:, i])
                         for i in range(self.k_variables)))
        return exog