Example #1
0
    def _fit(self, X, y, sample_weight=None, relative_penalties=None):
        if self.lambda_path is not None:
            n_lambda = len(self.lambda_path)
            min_lambda_ratio = 1.0
        else:
            n_lambda = self.n_lambda
            min_lambda_ratio = self.min_lambda_ratio

        check_classification_targets(y)
        self.classes_ = np.unique(y)  # the output of np.unique is sorted
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Training data need to contain at least 2 "
                             "classes.")

        # glmnet requires the labels a one-hot-encoded array of
        # (n_samples, n_classes)
        if n_classes == 2:
            # Normally we use 1/0 for the positive and negative classes. Since
            # np.unique sorts the output, the negative class will be in the 0th
            # column. We want a model predicting the positive class, not the
            # negative class, so we flip the columns here (the != condition).
            #
            # Broadcast comparison of self.classes_ to all rows of y. See the
            # numpy rules on broadcasting for more info, essentially this
            # "reshapes" y to (n_samples, n_classes) and self.classes_ to
            # (n_samples, n_classes) and performs an element-wise comparison
            # resulting in _y with shape (n_samples, n_classes).
            _y = (y[:, None] != self.classes_).astype(np.float64, order='F')
        else:
            # multinomial case, glmnet uses the entire array so we can
            # keep the original order.
            _y = (y[:, None] == self.classes_).astype(np.float64, order='F')

        # we need some sort of "offset" array for glmnet
        # an array of shape (n_examples, n_classes)
        offset = np.zeros((X.shape[0], n_classes), dtype=np.float64, order='F')

        # You should have thought of that before you got here.
        exclude_vars = 0

        # how much each feature should be penalized relative to the others
        # this may be useful to expose to the caller if there are vars that
        # must be included in the final model or there is some prior knowledge
        # about how important some vars are relative to others, see the glmnet
        # vignette:
        # http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
        if relative_penalties is None:
            relative_penalties = np.ones(X.shape[1],
                                         dtype=np.float64,
                                         order='F')

        coef_bounds = np.empty((2, X.shape[1]), dtype=np.float64, order='F')
        coef_bounds[0, :] = self.lower_limits
        coef_bounds[1, :] = self.upper_limits

        # This is a stopping criterion (ne), add 1 to ensure the final model
        # includes all features. R defaults to nx = num_features, and
        # ne = num_features + 1
        # Note, this will be ignored when the user specifies lambda_path.
        max_features = X.shape[1] + 1

        if n_classes == 2:
            # binomial, tell glmnet there is only one class
            # otherwise we will get a coef matrix with two dimensions
            # where each pair are equal in magnitude and opposite in sign
            # also since the magnitudes are constrained to sum to one, the
            # returned coefficients would be one half of the proper values
            n_classes = 1

        # for documentation on the glmnet function lognet, see doc.py
        if issparse(X):
            _x = csc_matrix(X, dtype=np.float64, copy=True)

            (
                self.n_lambda_,
                self.intercept_path_,
                ca,
                ia,
                nin,
                _,  # dev0
                _,  # dev
                self.lambda_path_,
                _,  # nlp
                jerr) = splognet(
                    self.alpha,
                    _x.shape[0],
                    _x.shape[1],
                    n_classes,
                    _x.data,
                    _x.indptr + 1,  # Fortran uses 1-based indexing
                    _x.indices + 1,
                    _y,
                    offset,
                    exclude_vars,
                    relative_penalties,
                    coef_bounds,
                    max_features,
                    max_features - 1,
                    min_lambda_ratio,
                    self.lambda_path,
                    self.tol,
                    n_lambda,
                    self.standardize,
                    self.fit_intercept,
                    self.max_iter,
                    0)
        else:  # not sparse
            # some notes: glmnet requires both x and y to be float64, the two
            # arrays
            # may also be overwritten during the fitting process, so they need
            # to be copied prior to calling lognet. The fortran wrapper will
            # copy any arrays passed to a wrapped function if they are not in
            # the fortran layout, to avoid making extra copies, ensure x and y
            # are `F_CONTIGUOUS` prior to calling lognet.
            _x = X.astype(dtype=np.float64, order='F', copy=True)

            (
                self.n_lambda_,
                self.intercept_path_,
                ca,
                ia,
                nin,
                _,  # dev0
                _,  # dev
                self.lambda_path_,
                _,  # nlp
                jerr) = lognet(self.alpha, n_classes, _x, _y, offset,
                               exclude_vars, relative_penalties, coef_bounds,
                               max_features, min_lambda_ratio,
                               self.lambda_path, self.tol, max_features - 1,
                               n_lambda, self.standardize, self.fit_intercept,
                               self.max_iter, 0)

        # raises RuntimeError if self.jerr_ is nonzero
        self.jerr_ = jerr
        _check_glmnet_error_flag(self.jerr_)

        # glmnet may not return the requested number of lambda values, so we
        # need to trim the trailing zeros from the returned path so
        # len(lambda_path_) is equal to n_lambda_
        self.lambda_path_ = self.lambda_path_[:self.n_lambda_]
        # also fix the first value of lambda
        self.lambda_path_ = _fix_lambda_path(self.lambda_path_)
        self.intercept_path_ = self.intercept_path_[:, :self.n_lambda_]
        # also trim the compressed coefficient matrix
        ca = ca[:, :, :self.n_lambda_]
        # and trim the array of n_coef per lambda (may or may not be non-zero)
        nin = nin[:self.n_lambda_]
        # decompress the coefficients returned by glmnet, see doc.py
        self.coef_path_ = lsolns(X.shape[1], ca, ia, nin)
        # coef_path_ has shape (n_features, n_classes, n_lambda), we should
        # match shape for scikit-learn models:
        # (n_classes, n_features, n_lambda)
        self.coef_path_ = np.transpose(self.coef_path_, axes=(1, 0, 2))

        return self
Example #2
0
    def _fit(self, X, y, sample_weight=None, relative_penalties=None):
        if self.lambda_path is not None:
            n_lambda = len(self.lambda_path)
            min_lambda_ratio = 1.0
        else:
            n_lambda = self.n_lambda
            min_lambda_ratio = self.min_lambda_ratio

        check_classification_targets(y)
        self.classes_ = np.unique(y)  # the output of np.unique is sorted
        n_classes = len(self.classes_)
        if n_classes < 2:
            raise ValueError("Training data need to contain at least 2 "
                             "classes.")

        # glmnet requires the labels a one-hot-encoded array of
        # (n_samples, n_classes)
        if n_classes == 2:
            # Normally we use 1/0 for the positive and negative classes. Since
            # np.unique sorts the output, the negative class will be in the 0th
            # column. We want a model predicting the positive class, not the
            # negative class, so we flip the columns here (the != condition).
            #
            # Broadcast comparison of self.classes_ to all rows of y. See the
            # numpy rules on broadcasting for more info, essentially this
            # "reshapes" y to (n_samples, n_classes) and self.classes_ to
            # (n_samples, n_classes) and performs an element-wise comparison
            # resulting in _y with shape (n_samples, n_classes).
            _y = (y[:, None] != self.classes_).astype(np.float64, order='F')
        else:
            # multinomial case, glmnet uses the entire array so we can
            # keep the original order.
            _y = (y[:, None] == self.classes_).astype(np.float64, order='F')

        # use sample weights, making sure all weights are positive
        # this is inspired by the R wrapper for glmnet, in lognet.R
        if sample_weight is not None:
            weight_gt_0 = sample_weight > 0
            sample_weight = sample_weight[weight_gt_0]
            _y = _y[weight_gt_0, :]
            X = X[weight_gt_0, :]
            _y = _y * np.expand_dims(sample_weight, 1)

        # we need some sort of "offset" array for glmnet
        # an array of shape (n_examples, n_classes)
        offset = np.zeros((X.shape[0], n_classes), dtype=np.float64,
                          order='F')

        # You should have thought of that before you got here.
        exclude_vars = 0

        # how much each feature should be penalized relative to the others
        # this may be useful to expose to the caller if there are vars that
        # must be included in the final model or there is some prior knowledge
        # about how important some vars are relative to others, see the glmnet
        # vignette:
        # http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html
        if relative_penalties is None:
            relative_penalties = np.ones(X.shape[1], dtype=np.float64,
                                         order='F')

        coef_bounds = np.empty((2, X.shape[1]), dtype=np.float64, order='F')
        coef_bounds[0, :] = self.lower_limits
        coef_bounds[1, :] = self.upper_limits

        if n_classes == 2:
            # binomial, tell glmnet there is only one class
            # otherwise we will get a coef matrix with two dimensions
            # where each pair are equal in magnitude and opposite in sign
            # also since the magnitudes are constrained to sum to one, the
            # returned coefficients would be one half of the proper values
            n_classes = 1


        # This is a stopping criterion (nx)
        # R defaults to nx = num_features, and ne = num_features + 1
        if self.max_features is None:
            max_features = X.shape[1]
        else:
            max_features = self.max_features

        # for documentation on the glmnet function lognet, see doc.py
        if issparse(X):
            _x = csc_matrix(X, dtype=np.float64, copy=True)

            (self.n_lambda_,
             self.intercept_path_,
             ca,
             ia,
             nin,
             _,  # dev0
             _,  # dev
             self.lambda_path_,
             _,  # nlp
             jerr) = splognet(self.alpha,
                              _x.shape[0],
                              _x.shape[1],
                              n_classes,
                              _x.data,
                              _x.indptr + 1,  # Fortran uses 1-based indexing
                              _x.indices + 1,
                              _y,
                              offset,
                              exclude_vars,
                              relative_penalties,
                              coef_bounds,
                              max_features,
                              X.shape[1] + 1,
                              min_lambda_ratio,
                              self.lambda_path,
                              self.tol,
                              n_lambda,
                              self.standardize,
                              self.fit_intercept,
                              self.max_iter,
                              0)
        else:  # not sparse
            # some notes: glmnet requires both x and y to be float64, the two
            # arrays
            # may also be overwritten during the fitting process, so they need
            # to be copied prior to calling lognet. The fortran wrapper will
            # copy any arrays passed to a wrapped function if they are not in
            # the fortran layout, to avoid making extra copies, ensure x and y
            # are `F_CONTIGUOUS` prior to calling lognet.
            _x = X.astype(dtype=np.float64, order='F', copy=True)

            (self.n_lambda_,
             self.intercept_path_,
             ca,
             ia,
             nin,
             _,  # dev0
             _,  # dev
             self.lambda_path_,
             _,  # nlp
             jerr) = lognet(self.alpha,
                            n_classes,
                            _x,
                            _y,
                            offset,
                            exclude_vars,
                            relative_penalties,
                            coef_bounds,
                            X.shape[1] + 1,
                            min_lambda_ratio,
                            self.lambda_path,
                            self.tol,
                            max_features,
                            n_lambda,
                            self.standardize,
                            self.fit_intercept,
                            self.max_iter,
                            0)

        # raises RuntimeError if self.jerr_ is nonzero
        self.jerr_ = jerr
        _check_error_flag(self.jerr_)

        # glmnet may not return the requested number of lambda values, so we
        # need to trim the trailing zeros from the returned path so
        # len(lambda_path_) is equal to n_lambda_
        self.lambda_path_ = self.lambda_path_[:self.n_lambda_]
        # also fix the first value of lambda
        self.lambda_path_ = _fix_lambda_path(self.lambda_path_)
        self.intercept_path_ = self.intercept_path_[:, :self.n_lambda_]
        # also trim the compressed coefficient matrix
        ca = ca[:, :, :self.n_lambda_]
        # and trim the array of n_coef per lambda (may or may not be non-zero)
        nin = nin[:self.n_lambda_]
        # decompress the coefficients returned by glmnet, see doc.py
        self.coef_path_ = lsolns(X.shape[1], ca, ia, nin)
        # coef_path_ has shape (n_features, n_classes, n_lambda), we should
        # match shape for scikit-learn models:
        # (n_classes, n_features, n_lambda)
        self.coef_path_ = np.transpose(self.coef_path_, axes=(1, 0, 2))

        return self