def _fit(self, X, y, sample_weight=None, relative_penalties=None): if self.lambda_path is not None: n_lambda = len(self.lambda_path) min_lambda_ratio = 1.0 else: n_lambda = self.n_lambda min_lambda_ratio = self.min_lambda_ratio check_classification_targets(y) self.classes_ = np.unique(y) # the output of np.unique is sorted n_classes = len(self.classes_) if n_classes < 2: raise ValueError("Training data need to contain at least 2 " "classes.") # glmnet requires the labels a one-hot-encoded array of # (n_samples, n_classes) if n_classes == 2: # Normally we use 1/0 for the positive and negative classes. Since # np.unique sorts the output, the negative class will be in the 0th # column. We want a model predicting the positive class, not the # negative class, so we flip the columns here (the != condition). # # Broadcast comparison of self.classes_ to all rows of y. See the # numpy rules on broadcasting for more info, essentially this # "reshapes" y to (n_samples, n_classes) and self.classes_ to # (n_samples, n_classes) and performs an element-wise comparison # resulting in _y with shape (n_samples, n_classes). _y = (y[:, None] != self.classes_).astype(np.float64, order='F') else: # multinomial case, glmnet uses the entire array so we can # keep the original order. _y = (y[:, None] == self.classes_).astype(np.float64, order='F') # we need some sort of "offset" array for glmnet # an array of shape (n_examples, n_classes) offset = np.zeros((X.shape[0], n_classes), dtype=np.float64, order='F') # You should have thought of that before you got here. exclude_vars = 0 # how much each feature should be penalized relative to the others # this may be useful to expose to the caller if there are vars that # must be included in the final model or there is some prior knowledge # about how important some vars are relative to others, see the glmnet # vignette: # http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html if relative_penalties is None: relative_penalties = np.ones(X.shape[1], dtype=np.float64, order='F') coef_bounds = np.empty((2, X.shape[1]), dtype=np.float64, order='F') coef_bounds[0, :] = self.lower_limits coef_bounds[1, :] = self.upper_limits # This is a stopping criterion (ne), add 1 to ensure the final model # includes all features. R defaults to nx = num_features, and # ne = num_features + 1 # Note, this will be ignored when the user specifies lambda_path. max_features = X.shape[1] + 1 if n_classes == 2: # binomial, tell glmnet there is only one class # otherwise we will get a coef matrix with two dimensions # where each pair are equal in magnitude and opposite in sign # also since the magnitudes are constrained to sum to one, the # returned coefficients would be one half of the proper values n_classes = 1 # for documentation on the glmnet function lognet, see doc.py if issparse(X): _x = csc_matrix(X, dtype=np.float64, copy=True) ( self.n_lambda_, self.intercept_path_, ca, ia, nin, _, # dev0 _, # dev self.lambda_path_, _, # nlp jerr) = splognet( self.alpha, _x.shape[0], _x.shape[1], n_classes, _x.data, _x.indptr + 1, # Fortran uses 1-based indexing _x.indices + 1, _y, offset, exclude_vars, relative_penalties, coef_bounds, max_features, max_features - 1, min_lambda_ratio, self.lambda_path, self.tol, n_lambda, self.standardize, self.fit_intercept, self.max_iter, 0) else: # not sparse # some notes: glmnet requires both x and y to be float64, the two # arrays # may also be overwritten during the fitting process, so they need # to be copied prior to calling lognet. The fortran wrapper will # copy any arrays passed to a wrapped function if they are not in # the fortran layout, to avoid making extra copies, ensure x and y # are `F_CONTIGUOUS` prior to calling lognet. _x = X.astype(dtype=np.float64, order='F', copy=True) ( self.n_lambda_, self.intercept_path_, ca, ia, nin, _, # dev0 _, # dev self.lambda_path_, _, # nlp jerr) = lognet(self.alpha, n_classes, _x, _y, offset, exclude_vars, relative_penalties, coef_bounds, max_features, min_lambda_ratio, self.lambda_path, self.tol, max_features - 1, n_lambda, self.standardize, self.fit_intercept, self.max_iter, 0) # raises RuntimeError if self.jerr_ is nonzero self.jerr_ = jerr _check_glmnet_error_flag(self.jerr_) # glmnet may not return the requested number of lambda values, so we # need to trim the trailing zeros from the returned path so # len(lambda_path_) is equal to n_lambda_ self.lambda_path_ = self.lambda_path_[:self.n_lambda_] # also fix the first value of lambda self.lambda_path_ = _fix_lambda_path(self.lambda_path_) self.intercept_path_ = self.intercept_path_[:, :self.n_lambda_] # also trim the compressed coefficient matrix ca = ca[:, :, :self.n_lambda_] # and trim the array of n_coef per lambda (may or may not be non-zero) nin = nin[:self.n_lambda_] # decompress the coefficients returned by glmnet, see doc.py self.coef_path_ = lsolns(X.shape[1], ca, ia, nin) # coef_path_ has shape (n_features, n_classes, n_lambda), we should # match shape for scikit-learn models: # (n_classes, n_features, n_lambda) self.coef_path_ = np.transpose(self.coef_path_, axes=(1, 0, 2)) return self
def _fit(self, X, y, sample_weight=None, relative_penalties=None): if self.lambda_path is not None: n_lambda = len(self.lambda_path) min_lambda_ratio = 1.0 else: n_lambda = self.n_lambda min_lambda_ratio = self.min_lambda_ratio check_classification_targets(y) self.classes_ = np.unique(y) # the output of np.unique is sorted n_classes = len(self.classes_) if n_classes < 2: raise ValueError("Training data need to contain at least 2 " "classes.") # glmnet requires the labels a one-hot-encoded array of # (n_samples, n_classes) if n_classes == 2: # Normally we use 1/0 for the positive and negative classes. Since # np.unique sorts the output, the negative class will be in the 0th # column. We want a model predicting the positive class, not the # negative class, so we flip the columns here (the != condition). # # Broadcast comparison of self.classes_ to all rows of y. See the # numpy rules on broadcasting for more info, essentially this # "reshapes" y to (n_samples, n_classes) and self.classes_ to # (n_samples, n_classes) and performs an element-wise comparison # resulting in _y with shape (n_samples, n_classes). _y = (y[:, None] != self.classes_).astype(np.float64, order='F') else: # multinomial case, glmnet uses the entire array so we can # keep the original order. _y = (y[:, None] == self.classes_).astype(np.float64, order='F') # use sample weights, making sure all weights are positive # this is inspired by the R wrapper for glmnet, in lognet.R if sample_weight is not None: weight_gt_0 = sample_weight > 0 sample_weight = sample_weight[weight_gt_0] _y = _y[weight_gt_0, :] X = X[weight_gt_0, :] _y = _y * np.expand_dims(sample_weight, 1) # we need some sort of "offset" array for glmnet # an array of shape (n_examples, n_classes) offset = np.zeros((X.shape[0], n_classes), dtype=np.float64, order='F') # You should have thought of that before you got here. exclude_vars = 0 # how much each feature should be penalized relative to the others # this may be useful to expose to the caller if there are vars that # must be included in the final model or there is some prior knowledge # about how important some vars are relative to others, see the glmnet # vignette: # http://web.stanford.edu/~hastie/glmnet/glmnet_alpha.html if relative_penalties is None: relative_penalties = np.ones(X.shape[1], dtype=np.float64, order='F') coef_bounds = np.empty((2, X.shape[1]), dtype=np.float64, order='F') coef_bounds[0, :] = self.lower_limits coef_bounds[1, :] = self.upper_limits if n_classes == 2: # binomial, tell glmnet there is only one class # otherwise we will get a coef matrix with two dimensions # where each pair are equal in magnitude and opposite in sign # also since the magnitudes are constrained to sum to one, the # returned coefficients would be one half of the proper values n_classes = 1 # This is a stopping criterion (nx) # R defaults to nx = num_features, and ne = num_features + 1 if self.max_features is None: max_features = X.shape[1] else: max_features = self.max_features # for documentation on the glmnet function lognet, see doc.py if issparse(X): _x = csc_matrix(X, dtype=np.float64, copy=True) (self.n_lambda_, self.intercept_path_, ca, ia, nin, _, # dev0 _, # dev self.lambda_path_, _, # nlp jerr) = splognet(self.alpha, _x.shape[0], _x.shape[1], n_classes, _x.data, _x.indptr + 1, # Fortran uses 1-based indexing _x.indices + 1, _y, offset, exclude_vars, relative_penalties, coef_bounds, max_features, X.shape[1] + 1, min_lambda_ratio, self.lambda_path, self.tol, n_lambda, self.standardize, self.fit_intercept, self.max_iter, 0) else: # not sparse # some notes: glmnet requires both x and y to be float64, the two # arrays # may also be overwritten during the fitting process, so they need # to be copied prior to calling lognet. The fortran wrapper will # copy any arrays passed to a wrapped function if they are not in # the fortran layout, to avoid making extra copies, ensure x and y # are `F_CONTIGUOUS` prior to calling lognet. _x = X.astype(dtype=np.float64, order='F', copy=True) (self.n_lambda_, self.intercept_path_, ca, ia, nin, _, # dev0 _, # dev self.lambda_path_, _, # nlp jerr) = lognet(self.alpha, n_classes, _x, _y, offset, exclude_vars, relative_penalties, coef_bounds, X.shape[1] + 1, min_lambda_ratio, self.lambda_path, self.tol, max_features, n_lambda, self.standardize, self.fit_intercept, self.max_iter, 0) # raises RuntimeError if self.jerr_ is nonzero self.jerr_ = jerr _check_error_flag(self.jerr_) # glmnet may not return the requested number of lambda values, so we # need to trim the trailing zeros from the returned path so # len(lambda_path_) is equal to n_lambda_ self.lambda_path_ = self.lambda_path_[:self.n_lambda_] # also fix the first value of lambda self.lambda_path_ = _fix_lambda_path(self.lambda_path_) self.intercept_path_ = self.intercept_path_[:, :self.n_lambda_] # also trim the compressed coefficient matrix ca = ca[:, :, :self.n_lambda_] # and trim the array of n_coef per lambda (may or may not be non-zero) nin = nin[:self.n_lambda_] # decompress the coefficients returned by glmnet, see doc.py self.coef_path_ = lsolns(X.shape[1], ca, ia, nin) # coef_path_ has shape (n_features, n_classes, n_lambda), we should # match shape for scikit-learn models: # (n_classes, n_features, n_lambda) self.coef_path_ = np.transpose(self.coef_path_, axes=(1, 0, 2)) return self