def test_max_features(self): x, y = self.inputs[3] max_features = 5 m = ElasticNet(n_splits=3, random_state=42, max_features=max_features) m = m.fit(x, y) num_features = np.count_nonzero(m.coef_) self.assertTrue(num_features <= max_features)
def test_ridge_models(self): '''Test that a pure ridge (alpha=0) model gives expected results for both dense and sparse matricies. We test that the ridge model, when fit on uncorrelated predictors, shrinks the parameter estiamtes uniformly. To see this, we generate linearly related data with a correlation free model matrix, then test that the array of ratios of fit parameters to true coefficients is a constant array. This test generates more samples than the others to guarentee that the data is sufficiently correlation free, otherwise the effect to be measured does not occur. ''' Xdn = np.random.random(size=(50000,3)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(3,)) for X in (Xdn, Xsp): for lam in np.linspace(0, 1, 10): y = np.dot(Xdn, w) enet = ElasticNet(alpha=0) enet.fit(X, y, lambdas=[lam]) ratios = enet._coefficients.ravel() / w norm_ratios = ratios / np.max(ratios) test = np.allclose( norm_ratios, 1, atol=.05 ) self.assertTrue(test)
def _notify(self, trainer): if not self.computed: X = trainer.X.cpu().detach().numpy() y = trainer.y.cpu().detach().numpy() glmnet = ElasticNet(n_splits=0, fit_intercept=False, lambda_path=np.exp( np.linspace(start=np.log(10**(-9)), stop=np.log(10), num=200))) glmnet.fit(X, y.squeeze()) self.lambdas, self.coefs = glmnet.lambda_path_, glmnet.coef_path_ # Swap axes for parameters learned by lasso path, so that lambda # corresponds to the same axis as time for gradient descent iterates. self.coefs = np.swapaxes(self.coefs, 0, 1) # So now self.coefs is of shape (n_lambda, n_params). for lambda_id in range(self.coefs.shape[0]): w_lambda = self.coefs[lambda_id, :] self.append_performance_metrics(w_lambda, trainer) if self.store_path is False: # Dummy array so that _aggregate_numeric_results works. self.coefs = [np.array([[-1], [-1]])] self.computed = True
def test_ridge_models(self): '''Test that a pure ridge (alpha=0) model gives expected results for both dense and sparse matricies. We test that the ridge model, when fit on uncorrelated predictors, shrinks the parameter estiamtes uniformly. To see this, we generate linearly related data with a correlation free model matrix, then test that the array of ratios of fit parameters to true coefficients is a constant array. This test generates more samples than the others to guarentee that the data is sufficiently correlation free, otherwise the effect to be measured does not occur. ''' Xdn = np.random.random(size=(50000, 3)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(3, )) for X in (Xdn, Xsp): for lam in np.linspace(0, 1, 10): y = np.dot(Xdn, w) enet = ElasticNet(alpha=0) enet.fit(X, y, lambdas=[lam]) ratios = enet._coefficients.ravel() / w norm_ratios = ratios / np.max(ratios) test = np.allclose(norm_ratios, 1, atol=.05) self.assertTrue(test)
def TL(self, x, y, pen_bic, pen_gic): m = ElasticNet() if len(x.shape) < 2: x.reshape((x.shape[0], 1)) m = m.fit(x, y) betas = m.coef_path_ intercepts = m.intercept_path_ BIC = np.inf for i in range(betas.shape[1]): RSS = np.sum((y - np.matmul(x, betas[:, i]) - intercepts[i])**2) k = np.sum(betas[:, i] != 0) BIC_new = RSS + pen_bic * k if BIC_new < BIC: BIC = BIC_new beta_bic = betas[:, i] intercept = intercepts[i] thresholds = beta_bic[beta_bic > 0] thresholds.sort() beta_gic = np.zeros_like(beta_bic) GIC = np.inf for delta in thresholds: beta_thres = deepcopy(beta_bic) beta_thres[beta_thres < delta] = 0 RSS = np.sum((y - np.matmul(x, beta_thres) - intercept)**2) k = np.sum(beta_thres != 0) GIC_new = RSS + pen_gic * k if GIC_new < GIC: GIC = GIC_new beta_gic = beta_thres return beta_gic, intercept
def test_random_state_cv(self): random_state = 133 m = ElasticNet(random_state=random_state) x, y = self.inputs[0] m.fit(x, y) print(dir(m._cv)) assert m._cv.random_state == random_state
def ode_integratedlasso_rank_vars(D, times, target, env=None, silent=True, interactions=True, rm_target=True): L = len(times) d = D.shape[1] // L n = D.shape[0] Xint = np.zeros([(L - 1) * n, d]) deltaY = np.zeros([(L - 1) * n]) for i in range(n): deltaY[(i) * (L - 1):(i + 1) * (L - 1)] = np.diff( D[i, target * L:(target + 1) * L]) for j in range(d): for i in range(n): tmp = D[i, j * L:(j + 1) * L] Xint[i * (L - 1):(i + 1) * (L - 1), j] = (tmp[:(L - 1)] + tmp[1:]) / 2 * np.diff(times) # remove NAs na_ind = np.logical_or(np.isnan(deltaY), (np.isnan(Xint) > 0).sum(axis=1)) deltaY = deltaY[~na_ind] Xint = Xint[~na_ind, ] # Perform lasso if interactions: dC2 = d * (d - 1) // 2 # combination var_names = np.zeros([d + dC2 + d], dtype=np.object) var_names[:d] = np.array([[]] + [[i] for i in range(d)], dtype=np.object)[1:] var_names[d:] = np.array([[]] + sum( ([[i, j] for j in range(i + 1)] for i in range(d)), []), dtype=np.object)[1:] Xint_interactions = np.zeros([n * (L - 1), len(var_names)]) Xint_interactions[:, :d] = Xint for i in range(d, len(var_names)): Xint_interactions[:, i] = Xint[:, var_names[i] [0]] * Xint[:, var_names[i][1]] fit = ElasticNet().fit(Xint_interactions, deltaY) sel_matrix = (np.abs(fit.coef_path_) > 1e-7) first_entrance = sel_matrix.max(axis=1) # find all rows without ones and set first entrance to Inf first_entrance[sel_matrix.sum(axis=1) == 0] = np.infty ranking = first_entrance.argsort() ranking = var_names[ranking] else: fit = ElasticNet().fit(Xint, deltaY) sel_matrix = fit.coef_path_ != 0 first_entrance = sel_matrix.max(axis=1) # find all rows without ones and set first entrance to Inf first_entrance[sel_matrix.sum(axis=1) == 0] = np.infty ranking = first_entrance.argsort() if rm_target: ranking = ranking[ranking != target] return ({'ranking': ranking, 'coef': fit.coef_})
def test_validate_weights(self): X = np.random.random(size=(50, 10)) w = np.random.random(size=(10, )) y = np.dot(X, w) enet = ElasticNet(alpha=.5) # Invalid use # Passing in a sample weight vector that is too short. with self.assertRaises(ValueError): sw = np.ones(shape=(49, )) enet._validate_weights(X, y, weights=sw) # Invalid use: # Passing in a weight vector that matches the wrong dimenstion of X. with self.assertRaises(ValueError): sw = np.ones(shape=(10, )) enet._validate_weights(X, y, weights=sw) # Invalid use: # Passing in a weight vector containing a negative entry. with self.assertRaises(ValueError): sw = np.ones(shape=(50, )) sw[25] = -1 enet._validate_weights(X, y, weights=sw) # Valid Use: # Weight vector of the correct dimension with all non-negative # entries. sw = np.ones(shape=(50, )) enet._validate_weights(X, y, weights=sw)
def test_one_row_predict(self): # Verify that predicting on one row gives only one row of output m = ElasticNet(random_state=42) for X, y in self.inputs: m.fit(X, y) p = m.predict(X[0].reshape((1, -1))) assert p.shape == (1,)
class knockoff_lasso(knockoff_net): """ Preforms the knockoff technique with lasso """ def fit(self,X_lrg=None): """ Generates the knockoffs, fits the regression, and performs the FDR calculations """ # Generate knockoff as inherited from knockoff_net if X_lrg is None: if self.knockoff_type == 'original': self._original_knockoff() elif self.knockoff_type == 'binary': self._binary_knockoff() else: self.X_lrg = X_lrg # initialize the glmnet object self.elasticnet = ElasticNet(alpha=1,n_lambdas=self.p*20,frac_lg_lambda=min(.000001,.01/(self.p**2))) self.elasticnet.fit(self.X_lrg,self.y,normalize=False,include_intercept=self.intercept) # pull out some values from the glmnet object and clean self.lambdas = self.elasticnet.out_lambdas self.var_index_ent = np.sort(self.elasticnet._indices) self.coef_matrix = np.zeros((2*self.p,self.elasticnet.n_lambdas)) self.coef_matrix[self.var_index_ent] = self.elasticnet._comp_coef.squeeze()[self.elasticnet._indices] # figure out when different variables entered the model self.var_entered = np.zeros(2*self.p).astype(bool) self.var_entered[self.var_index_ent] = True # Preform all the FDR calculations as inherited from knockoff_net self._get_z() self._get_w() self._get_T() self._get_S()
def test_one_row_predict_with_lambda(self): # One row to predict along with lambdas should give 2D output m = ElasticNet(random_state=42) for X, y in self.inputs: m.fit(X, y) p = m.predict(X[0].reshape((1, -1)), lamb=[20, 10]) assert p.shape == (1, 2)
def test_with_single_var(self): x = np.random.rand(500,1) y = (1.3 * x).ravel() m = ElasticNet(random_state=449065) m = m.fit(x, y) self.check_r2_score(y, m.predict(x), 0.90)
def test_coef_limits(self): x, y = self.inputs[0] lower_limits = np.repeat(-1, x.shape[1]) upper_limits = 0 m = ElasticNet(lower_limits=lower_limits, upper_limits=upper_limits, random_state=5934, alpha=0) m = m.fit(x, y) assert(np.all(m.coef_ >= -1)) assert(np.all(m.coef_ <= 0))
def test_predict_without_cv(self): x, y = self.inputs[0] m = ElasticNet(n_splits=0, random_state=340561) m = m.fit(x, y) # should not make prediction unless value is passed for lambda with self.assertRaises(ValueError): m.predict(x)
def test_with_pandas_df(self): x, y = make_regression(random_state=561) df = pd.DataFrame(x) df['y'] = y m = ElasticNet(n_folds=3, random_state=123) m = m.fit(df.drop(['y'], axis=1), df.y) sanity_check_regression(m, x)
def test_with_pandas_df(self): x, y = make_regression(random_state=561) df = pd.DataFrame(x) df['y'] = y m = ElasticNet(n_splits=3, random_state=123) m = m.fit(df.drop(['y'], axis=1), df.y) sanity_check_regression(m, x)
def test_with_no_predictor_variance(self): x = np.ones((500, 1)) y = np.random.rand(500) m = ElasticNet(random_state=561) msg = "All predictors have zero variance (glmnet error no. 7777)." with self.assertRaises(ValueError, msg=msg): m.fit(x, y)
def test_coef_limits(self): x, y = self.inputs[0] lower_limits = 0 upper_limits = np.repeat(1, x.shape[1]) m = ElasticNet(lower_limits=lower_limits, upper_limits=upper_limits, random_state=5934) m = m.fit(x, y) assert (np.all(m.coef_) >= 0) assert (np.all(m.coef_) <= 1)
def test_validate_weights(self): X = np.random.random(size=(50,10)) w = np.random.random(size=(10,)) y = np.dot(X, w) enet = ElasticNet(alpha=.5) # Invalid use # Passing in a sample weight vector that is too short. with self.assertRaises(ValueError): sw = np.ones(shape=(49,)) enet._validate_weights(X, y, weights=sw) # Invalid use: # Passing in a weight vector that matches the wrong dimenstion of X. with self.assertRaises(ValueError): sw = np.ones(shape=(10,)) enet._validate_weights(X, y, weights=sw) # Invalid use: # Passing in a weight vector containing a negative entry. with self.assertRaises(ValueError): sw = np.ones(shape=(50,)) sw[25] = -1 enet._validate_weights(X, y, weights=sw) # Valid Use: # Weight vector of the correct dimension with all non-negative # entries. sw = np.ones(shape=(50,)) enet._validate_weights(X, y, weights=sw)
def test_n_splits(self): x, y = self.inputs[0] for n in self.n_splits: m = ElasticNet(n_splits=n, random_state=6601) if n > 0 and n < 3: with self.assertRaisesRegexp(ValueError, "n_splits must be at least 3"): m = m.fit(x, y) else: m = m.fit(x, y) sanity_check_regression(m, x)
def test_fit_cv_glmnet_comparison(): rng = np.random.default_rng(SEED) error = rng.normal(loc=0, scale=1, size=100) X = rng.normal(loc=5, scale=2, size=(100, 4)) true_betas = np.array([1, -2, 0.5, 1]) y = X.dot(true_betas) + error m = Elnet(n_splits=3, random_state=182, scoring="r2") m.fit(X, y) m2 = ElasticNet(n_splits=3, random_state=182) m2.fit(X, y) np.testing.assert_almost_equal(m.lambda_max_, m2.lambda_max_) np.testing.assert_almost_equal(m.lambda_1se_, m2.lambda_best_[0])
def test_with_defaults(self): m = ElasticNet(random_state=2821) for x, y in self.inputs: m = m.fit(x, y) sanity_check_regression(m, x) # check selection of lambda_best self.assertTrue(m.lambda_best_inx_ <= m.lambda_max_inx_) # check full path predict p = m.predict(x, lamb=m.lambda_path_) self.assertEqual(p.shape[-1], m.lambda_path_.size)
def test_validate_matrix(self): '''Test the _validate_matrix method.''' Xdn = np.random.random(size=(50,10)) enet = ElasticNet(alpha=.5) # Invalid use: # Passing in a sparse matrix in the incorrect format. with self.assertRaises(ValueError): Xsp = csr_matrix(Xdn) enet._validate_matrix(Xsp) # Valid use: # Passing in a matrix in compressed sparse column format. Xsp = csc_matrix(Xdn) enet._validate_matrix(Xsp)
def _parallel_permute_count_nonzero_penalised_coefs(xp, yp, lam_path, penalties, norm_num, is_regression): from glmnet import ElasticNet, LogitNet np.random.shuffle(yp) params = dict(alpha=norm_num, lambda_path=lam_path) pm = ElasticNet(**params) if is_regression else LogitNet(**params) pm.fit(xp, yp, relative_penalties=penalties) return np.sign( np.abs(np.squeeze(pm.coef_path_)) * vec_to_array(penalties)).sum(axis=0)
def run(self, epochs: int): print("Starting ElasticNet simulation.") # First, set up the data loaders, that will be used for computing # metrics via the self.executor object. self._reset_data_loaders() # The epochs parameter will be ignored but it is needed for # compatibility of the rprml.core.Executor class. X = self.train_dataset.X.cpu().detach().numpy() y = self.train_dataset.y.cpu().detach().numpy() # Lambda path has to be supplied in decreasing order. lambda_path = np.array(self.lambdas) lambda_path = -np.sort(-lambda_path) # Append infinity to the front of lambda_path. # We do this because the glmnet package modifies the first lambda. lambda_path = np.insert(lambda_path, 0, 1e10000, axis=0) glmnet = ElasticNet(alpha=self.alpha, n_splits=0, fit_intercept=False, standardize=False, lambda_path=lambda_path) glmnet.fit(X, y.squeeze()) lambdas, coefs = glmnet.lambda_path_, glmnet.coef_path_ # Swap axes for parameters learned by lasso path, so that lambda # corresponds to the first axis. coefs = np.swapaxes(coefs, 0, 1) # Remove the first value of lambda (because glmnet modifies its value) # and remove the associated fitted vector. lambdas = np.array(lambdas).flatten()[1:] coefs = np.array(coefs)[1:, :] # Save lambdas and alpha to the executor's history. self.executor.history['lambdas'] = lambdas self.executor.history['alpha'] = self.alpha # For each fitted model, compute the metrics registered to # self.executor. for lambda_id in range(coefs.shape[0]): w_lambda = coefs[lambda_id, :] w_lambda = torch.tensor(w_lambda, dtype=torch.float32, device=self.device) self.model.set_w(w_lambda) # Compute the metrics associated to w_lambda. self.trainer.fire_event(_iteration_level_event)
def FeatureSelection(df_x, xtrain, ytrain, exclude_cols=[]): # #### Gam # gam = LinearGAM(n_splines=4).gridsearch(xtrain, ytrain) # pvalues = np.array(gam.statistics_['p_values']) # important_x_idx_gam = [idx-1 for idx in np.where(pvalues < 0.1)[0]] # important_x_gam = df_x.iloc[:, important_x_idx_gam] #### Lasso lasso = ElasticNet(alpha=1, n_splits=10, random_state=123, n_jobs=4) lasso.fit(xtrain, ytrain) coeffs = lasso.coef_ important_x_dx_lasso = np.where(coeffs != 0.)[0] important_x_lasso = [col for col in df_x.columns[important_x_dx_lasso]] return (important_x_lasso, important_x_dx_lasso)
def train_and_test(basename): d = np.load("train_test_" + basename + ".npz") train_x = d["train_x"] train_y = d["train_y"] test_x = d["test_x"] test_y = d["test_y"] countrylist = d["countrylist"] numcountries = train_x.shape[1] weights = np.zeros((numcountries,numcountries)) enets = [None]*numcountries enet_cvs = [None]*numcountries preds = np.zeros(test_y.shape) errors = np.zeros(test_y.shape) var_ratio = np.zeros((numcountries)) for i in range(numcountries): enets[i] = ElasticNet(alpha=.1) enet_cvs[i] = CVGlmNet(enets[i], n_folds=10, n_jobs=10) enet_cvs[i].fit(train_x, train_y[:,i]) bli = enet_cvs[i].best_lambda_idx weights[i,:] = enet_cvs[i].base_estimator.get_coefficients_from_lambda_idx(bli) preds[:,i] = enet_cvs[i].predict(test_x) errors[:,i] = test_y[:,i] - preds[:,i] var_truth = np.var(test_y[:,i]) var_err = np.var(errors[:,i]) var_ratio[i] = 1 - var_err/var_truth print("finished predicting country number %d" % i) np.savez("results_" + basename + ".npz", preds = preds, truth=test_y, errors = errors, var_ratio = var_ratio, countrylist = countrylist, weights=weights)
def glmnet_box(): m1 = ElasticNet(n_splits=20, scoring='r2', alpha=0) m1.fit(music_features, box_latitude_label) lat_r_squared = m1.score(music_features, box_latitude_label) print('GLMNET ridge lattitude r2 {}'.format(lat_r_squared)) plot_predictions( inverse_box_cox(m1.predict(music_features), lambda_lat, 90), latitude_label, 'ridge_latitude_residual.png', 'residual vs fitted latitude for Ridge') m1.fit(music_features, box_longitude_label) lon_r_squared = m1.score(music_features, box_longitude_label) print('GLMNET ridge longitude r2 {}'.format(lon_r_squared)) plot_predictions( inverse_box_cox(m1.predict(music_features), lambda_lon, 180), longitude_label, 'ridge_longitude_residual.png', 'residual vs fitted longitude for Ridge regression')
def glmnet_lasso(): m = ElasticNet(n_splits=20, scoring='r2', alpha=1) m.fit(music_features, box_latitude_label) latitude_r_squared = m.score(music_features, box_latitude_label) print('GLMNET lasso latitude r2 {}'.format(latitude_r_squared)) plot_predictions( inverse_box_cox(m.predict(music_features), lambda_lat, 90), latitude_label, 'lasso_latitude_residual.png', 'residual vs fitted latitude for lasso regression') m.fit(music_features, box_longitude_label) longitude_r_squared = m.score(music_features, box_longitude_label) print('GLMNET lasso longitude r2 {}'.format(longitude_r_squared)) plot_predictions( inverse_box_cox(m.predict(music_features), lambda_lon, 180), longitude_label, 'lasso_longitude_residual.png', 'residual vs fitted longitude for lasso regression')
def test_linear_glmnet(benchmark, alpha, n_obs): rng = np.random.default_rng(SEED) error = rng.normal(loc=0, scale=1, size=n_obs) X = rng.normal(loc=5, scale=2, size=(n_obs, 4)) true_betas = np.array([1, -2, 0.5, 1]) y = X.dot(true_betas) + error m = ElasticNet(alpha=alpha) benchmark(m.fit, X, y)
def test_unregularized_with_weights(self): '''Test that fitting an unregularized model (lambda=0) gives expected results when sample weights are used. ''' Xdn = np.random.random(size=(5000,10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10,)) y = np.dot(Xdn, w) sw = np.random.uniform(size=(5000,)) for alpha in [0, .5, 1]: for X in (Xdn, Xsp): enet = ElasticNet(alpha=alpha) enet.fit(X, y, lambdas=[0], weights=sw) test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01) self.assertTrue(test_preds) test_coefs = np.allclose(enet._coefficients.ravel(), w, atol=.02) self.assertTrue(test_coefs)
def test_ridge_with_weights(self): '''Test that a pure ridge (alpha=0) model gives expected results for both dense and sparse matricies. ''' Xdn = np.random.random(size=(50000, 3)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(3, )) sw = np.random.uniform(size=(50000, )) sw = sw / np.sum(sw) for X in (Xdn, Xsp): for lam in np.linspace(0, 1, 10): y = np.dot(Xdn, w) enet = ElasticNet(alpha=0) enet.fit(X, y, lambdas=[lam], weights=sw) ratios = enet._coefficients.ravel() / w norm_ratios = ratios / np.max(ratios) test = np.allclose(norm_ratios, 1, atol=.05) self.assertTrue(test)
def test_lasso_with_weights(self): '''Test that a pure lasso (alpha=1) model gives expected results when sample weights are used. ''' Xdn = np.random.random(size=(25000, 10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10, )) sw = np.random.uniform(size=(25000, )) sw = sw / np.sum(sw) for w_mask in range(1, 10): for X in (Xdn, Xsp): w_masked = w.copy() w_masked[w_mask:] = 0 y = np.dot(Xdn, w_masked) enet = ElasticNet(alpha=1) enet.fit(X, y, lambdas=[.01], weights=sw) test = (len(enet._coefficients.ravel() == w_mask)) self.assertTrue(test)
def l1_l2_regression(alpha): m = ElasticNet(n_splits=20, scoring='r2', alpha=alpha) m.fit(music_features, box_latitude_label) lat_r_squared = m.score(music_features, box_latitude_label) print('GLMNET L1 L2 alpha {} latitude r2 {}'.format(alpha, lat_r_squared)) plot_predictions( inverse_box_cox(m.predict(music_features), lambda_lat, 90), latitude_label, 'l1_l2_latitude_residual_{}.png'.format(alpha), 'residual vs fitted latitude for l1_l2 \n regression alpha {}'.format( alpha)) m.fit(music_features, box_longitude_label) lon_r_squared = m.score(music_features, box_longitude_label) print('GLMNET L1 L2 alpha {} longitude r2 {}'.format(alpha, lon_r_squared)) plot_predictions( inverse_box_cox(m.predict(music_features), lambda_lon, 180), longitude_label, 'l1_l2_longitude_residual_{}.png'.format(alpha), 'residual vs fitted longitude for l1_l2 \n regression alpha {}'.format( alpha))
def test_lasso_with_weights(self): '''Test that a pure lasso (alpha=1) model gives expected results when sample weights are used. ''' Xdn = np.random.random(size=(25000,10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10,)) sw = np.random.uniform(size=(25000,)) sw = sw / np.sum(sw) for w_mask in range(1, 10): for X in (Xdn, Xsp): w_masked = w.copy() w_masked[w_mask:] = 0 y = np.dot(Xdn, w_masked) enet = ElasticNet(alpha=1) enet.fit(X, y, lambdas=[.01], weights=sw) test = (len(enet._coefficients.ravel() == w_mask)) self.assertTrue(test)
def test_coef_interpolation(self): x, y = self.inputs[0] m = ElasticNet(n_splits=0, random_state=1729) m = m.fit(x, y) # predict for a value of lambda between two values on the computed path lamb_lo = m.lambda_path_[1] lamb_hi = m.lambda_path_[2] # a value not equal to one on the computed path lamb_mid = (lamb_lo + lamb_hi) / 2.0 pred_lo = m.predict(x, lamb=lamb_lo) pred_hi = m.predict(x, lamb=lamb_hi) pred_mid = m.predict(x, lamb=lamb_mid) self.assertFalse(np.allclose(pred_lo, pred_mid)) self.assertFalse(np.allclose(pred_hi, pred_mid))
def test_unregularized_with_weights(self): '''Test that fitting an unregularized model (lambda=0) gives expected results when sample weights are used. ''' Xdn = np.random.random(size=(5000, 10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10, )) y = np.dot(Xdn, w) sw = np.random.uniform(size=(5000, )) for alpha in [0, .5, 1]: for X in (Xdn, Xsp): enet = ElasticNet(alpha=alpha) enet.fit(X, y, lambdas=[0], weights=sw) test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01) self.assertTrue(test_preds) test_coefs = np.allclose(enet._coefficients.ravel(), w, atol=.02) self.assertTrue(test_coefs)
def test_unregularized_models(self): '''Test that fitting an unregularized model (lambda=0) gives expected results for both dense and sparse model matricies. We test that an unregularized model captures a perfect linear relationship without error. That is, the fit parameters equals the true coefficients. ''' Xdn = np.random.random(size=(5000,10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10,)) y = np.dot(Xdn, w) for alpha in [0, .5, 1]: for X in (Xdn, Xsp): enet = ElasticNet(alpha=alpha) enet.fit(X, y, lambdas=[0]) test_preds = np.allclose(enet.predict(X).ravel(), y, atol=.01) self.assertTrue(test_preds) test_coefs = np.allclose(enet._coefficients.ravel(), w, atol=.02) self.assertTrue(test_coefs)
def test_ridge_with_weights(self): '''Test that a pure ridge (alpha=0) model gives expected results for both dense and sparse matricies. ''' Xdn = np.random.random(size=(50000,3)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(3,)) sw = np.random.uniform(size=(50000,)) sw = sw / np.sum(sw) for X in (Xdn, Xsp): for lam in np.linspace(0, 1, 10): y = np.dot(Xdn, w) enet = ElasticNet(alpha=0) enet.fit(X, y, lambdas=[lam], weights=sw) ratios = enet._coefficients.ravel() / w norm_ratios = ratios / np.max(ratios) test = np.allclose( norm_ratios, 1, atol=.05 ) self.assertTrue(test)
def test_max_lambda_with_weights(self): '''Test that the calculations of max_lambda inside the fortran code and inside the python code give the same result on both dense and sparse matricies, even when sample weights come into play. ''' Xdn = np.random.random(size=(50,10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10,)) y = np.dot(Xdn, w) sw = np.random.uniform(size=(50,)) for alpha in [.01, .5, 1]: for X in (Xdn, Xsp): enet = ElasticNet(alpha=alpha) enet.fit(X, y, weights=sw) ol = enet.out_lambdas max_lambda_from_fortran = ol[1] * (ol[1]/ol[2]) max_lambda_from_python = enet._max_lambda(X, y, weights=sw) self.assertAlmostEqual( max_lambda_from_fortran, max_lambda_from_python, 4 )
def test_lasso_models(self): '''Test that a pure lasso (alpha=1) model gives expected results for both dense and sparse design matricies. We test that the lasso model has the ability to pick out zero parameters from a linear relationship. To see this, we generate linearly related data were some number of the coefficients are exactly zero, and make sure the lasso model can pick these out. ''' Xdn = np.random.random(size=(25000,10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10,)) for w_mask in range(1, 10): for X in (Xdn, Xsp): w_masked = w.copy() w_masked[w_mask:] = 0 y = np.dot(Xdn, w_masked) enet = ElasticNet(alpha=1) enet.fit(X, y, lambdas=[.01]) test = (len(enet._coefficients.ravel() == w_mask)) self.assertTrue(test)
def test_max_lambda(self): '''Test that the calculations of max_lambda inside the fortran code and inside the python code give the same result on both dense and sparse matricies. Note that the implementation of max_lambda for alpha=0 in the fortran code is unknown, so we currently do not test against it. ''' Xdn = np.random.random(size=(50,10)) Xsp = csc_matrix(Xdn) w = np.random.random(size=(10,)) y = np.dot(Xdn, w) for alpha in [.01, .5, 1]: for X in (Xdn, Xsp): enet = ElasticNet(alpha=alpha) enet.fit(X, y) ol = enet.out_lambdas max_lambda_from_fortran = ol[1] * (ol[1]/ol[2]) max_lambda_from_python = enet._max_lambda(X, y) self.assertAlmostEqual( max_lambda_from_fortran, max_lambda_from_python, 4 )
def test_lambda_clip_warning(self): x, y = self.inputs[0] m = ElasticNet(n_splits=0, random_state=1729) m = m.fit(x, y) # we should get a warning when we ask for predictions at values of # lambda outside the range of lambda_path_ with self.assertWarns(RuntimeWarning): # note, lambda_path_ is in decreasing order m.predict(x, lamb=m.lambda_path_[0] + 1) with self.assertWarns(RuntimeWarning): m.predict(x, lamb=m.lambda_path_[-1] - 1)
def test_validate_inputs(self): X = np.random.random(size=(50,10)) w = np.random.random(size=(10,)) enet = ElasticNet(alpha=.5) # Invalid Use: # Passing in a y that is too short. with self.assertRaises(ValueError): yprime = np.random.random(size=(49,)) enet._validate_inputs(X, yprime) # Invalid use: # Passing in a y that matches the wrong dimenstion of X. with self.assertRaises(ValueError): yprime = np.random.random(size=(10,)) enet._validate_inputs(X, yprime) # Valid Use: # Passing in a y of the correct dimension. yprime = np.random.random(size=(50,)) enet._validate_inputs(X, yprime)
def test_validate_excl_preds(self): X = np.random.random(size=(50,10)) w = np.random.random(size=(10,)) y = np.dot(X, w) enet = ElasticNet(alpha=.5) # Invalid use # Passing in a excl_preds array that is to long. with self.assertRaises(ValueError): excl_preds = np.ones(shape=(12,)) enet._validate_excl_preds(X, y, excl_preds=excl_preds) # Invalid use # Alltempt to exclude a predictor out of range, i.e. that does # not exist. with self.assertRaises(ValueError): excl_preds = np.ones(shape=(11,)) excl_preds[0] = 1 excl_preds[5] = 10 enet._validate_excl_preds(X, y, excl_preds=excl_preds) # Valid use # Exclude some in range predictors. excl_preds = np.array([1, 2, 4, 6, 8]) enet._validate_excl_preds(X, y, excl_preds=excl_preds)
def test_relative_penalties(self): m1 = ElasticNet(random_state=4328) m2 = ElasticNet(random_state=4328) for x, y in self.inputs: p = x.shape[1] # m1 no relative penalties applied m1.fit(x, y) # find the nonzero indices from LASSO nonzero = np.nonzero(m1.coef_) # unpenalize those nonzero coefs penalty = np.repeat(1, p) penalty[nonzero] = 0 # refit the model with the unpenalized coefs m2.fit(x, y, relative_penalties=penalty) # verify that the unpenalized coef ests exceed the penalized ones # in absolute value assert(np.all(np.abs(m1.coef_) <= np.abs(m2.coef_)))
from sklearn.datasets import make_regression display_bar = '-'*70 X, y = make_regression( n_samples = 5000, n_features = 100, n_informative = 30, effective_rank = 40, noise = .1, ) print display_bar print "Fit an elastic net on some fake data" print display_bar enet = ElasticNet(alpha=.025) enet.fit(X, y) print enet print display_bar print "Predictions vs. actuals for the last elastic net model:" print display_bar preds = enet.predict(X) print y[:10] print preds[:10,np.shape(preds)[1]-1] enet.plot_paths()
def test_alphas(self): x, y = self.inputs[0] for alpha in self.alphas: m = ElasticNet(alpha=alpha, random_state=2465) m = m.fit(x, y) self.check_r2_score(y, m.predict(x), 0.90, alpha=alpha)
def test_cv_scoring(self): x, y = self.inputs[0] for method in self.scoring: m = ElasticNet(scoring=method, random_state=1729) m = m.fit(x, y) self.check_r2_score(y, m.predict(x), 0.90, scoring=method)