def AdaptiveLasso(X, y, logistic=False, sample_weight=None, adaptive_weights=None, random_state=None): """ Adaptive Lasso with cross-validation for otpimal lambda """ if logistic: enet = glmnet.LogitNet(standardize=False, fit_intercept=False, n_splits=5, scoring='accuracy', alpha=1) enet.fit(X, y, relative_penalties=adaptive_weights, sample_weight=sample_weight) else: enet = glmnet.ElasticNet(standardize=False, fit_intercept=False, n_splits=5, scoring='mean_squared_error', alpha=1) enet.fit(X, y, relative_penalties=adaptive_weights, sample_weight=sample_weight) return enet.coef_
def _logistic_regression(self, theta, K=10): """ Computes logistic regression coefficients for data sampled from the likelihood and from the marginal. theta: np.ndarray Parameter sample for which to compute the coefficients. K: int The number of K-fold cross-validations to be used. """ # Select model params according to weights ws_norm = self.weights / np.sum(self.weights) p_selec = list() idx_selec = list() for _ in range(self.prior_samples.shape[0]): cat = np.random.choice(range(len(ws_norm)), p=ws_norm) p_selec.append(self.prior_samples[cat]) idx_selec.append(cat) # Simulate from marginal using selected model params y_m = self.simobj.sample_data(self.d, p_selec) y_m = self.simobj.summary(y_m) # Simulate from likelihood y_t = self.simobj.sample_data(self.d, theta, num=len(ws_norm)) y_t = self.simobj.summary(y_t) # Prepare targets t_t = np.ones(y_t.shape[0]) t_m = np.zeros(y_m.shape[0]) # Concatenate data Y = np.concatenate((y_t, y_m), axis=0) T = np.concatenate((t_t, t_m)) # Define glmnet model model = glmnet.LogitNet(n_splits=K, verbose=False, n_jobs=1, scoring="log_loss") model.fit(Y, T) # collect coefficients and intercept cf_choice = model.coef_path_[..., model.lambda_max_inx_].T.reshape(-1) inter = model.intercept_path_[..., model.lambda_max_inx_] cf = np.array(list(inter) + list(cf_choice)).reshape(-1, 1) return cf
def run_glmnet_classification(df, X, y, dataset, reg_alpha, reg_lambda, metric, density): tmp = time.time() glm = glmnet.LogitNet(alpha=reg_alpha / (reg_alpha + reg_lambda), lambda_path=[reg_alpha + reg_lambda]) glm.fit(X, y) glmnet_time = time.time() - tmp glmnet_score = metrics.accuracy_score(y, glm.predict(X)) glmnet_zero = count_zero_coefficients(glm.coef_) df.loc[len(df)] = [ dataset, X.shape[1], X.shape[0], density, 'Classification', reg_alpha, reg_lambda, 'Glmnet', metric, glmnet_score, glmnet_time, '-', glmnet_zero ]
def ElasticNet(X, y, logistic=False, sample_weight=None, random_state=None): """ Elastic Net with cross-validation for otpimal alpha and lambda """ mses = np.array([]) cv_result_dict = {} if logistic: for i, alpha in enumerate(np.arange(0, 1.1, 0.1)): cv_enet = glmnet.LogitNet(standardize=False, fit_intercept=False, n_splits=5, scoring='accuracy', alpha=alpha).fit(X, y, sample_weight=sample_weight) cv_enet.fit(X, y, sample_weight=sample_weight) mses = np.append(mses, cv_enet.cv_mean_score_.max()) cv_result_dict[f'cv_result_{i}'] = cv_enet else: for i, alpha in enumerate(np.arange(0, 1.1, 0.1)): cv_enet = glmnet.ElasticNet(standardize=False, fit_intercept=False, n_splits=5, scoring='mean_squared_error', alpha=alpha).fit(X, y, sample_weight=sample_weight) cv_enet.fit(X, y, sample_weight=sample_weight) mses = np.append(mses, cv_enet.cv_mean_score_.max()) cv_result_dict[f'cv_result_{i}'] = cv_enet cv_max_model = cv_result_dict[f'cv_result_{np.argmax(mses)}'] return cv_max_model.coef_
y_test.append(k) k += 1 X_train = np.array(X_train) X_test = np.array(X_test) y_train = np.array(y_train) y_test = np.array(y_test) # model.docvecs.most_similar() ## fit classifier ## import glmnet from sklearn.metrics import confusion_matrix import pandas as pd glm = glmnet.LogitNet() glm.fit(X_train, y_train) glm.score(X_test, y_test) y_pred = glm.predict(X_test) confusion_matrix(y_test, y_pred) ## fit classifier subset ## subset = [0, 2] # Fox and NYT ix_train = np.isin(y_train, subset) ix_test = np.isin(y_test, subset) glm = glmnet.LogitNet() glm.fit(X_train[ix_train], y_train[ix_train]) glm.score(X_test[ix_test], y_test[ix_test]) y_pred = glm.predict(X_test[ix_test])
import glmnet import sklearn.datasets import sklearn.model_selection import sklearn.metrics X, y = sklearn.datasets.load_iris(return_X_y=True) Xtrn, Xtst, ytrn, ytst = sklearn.model_selection.train_test_split( X, y, train_size=0.8, random_state=4) clf = glmnet.LogitNet() clf.fit(Xtrn, ytrn) ypred = clf.predict(Xtst) acc = sklearn.metrics.accuracy_score(ytst, ypred) print('glmnet accuracy on iris:', acc) assert acc > 0.9