# d_cor_xy = file["d_cor_xy"] # d_cor_xx = file["d_cor_xx"] # # d_cor_xy_ordered = np.argsort(d_cor_xy) # XTrain = np.delete(XTrain, d_cor_xy_ordered[:24],axis = 1) # XTest = np.delete(XTest, d_cor_xy_ordered[:24],axis = 1) linearRegression = linear_model.LinearRegression(fit_intercept=False, n_jobs = -1) count=0 while len(num_informative)<n_informative and len(saved_indexes)<active_set: losses = np.array([]) betas = np.array([]) coeffs = np.array([]) corrs = np.array([]) blocks_generated = generate_samples(num_blocks, XTrain.shape[1], active_set, r, saved_indexes) for i in range(0, num_blocks): x_train_i, x_test_i = get_current_data(XTrain, XTest, blocks_generated[i,:]) new_loss,beta,corr = compute_mse(linearRegression, x_train_i, YTrain,x_test_i, YTest) losses = np.append(losses, new_loss) if len(betas)==0: betas = beta corrs = corr else: betas = np.append(betas, beta, axis =1) corrs = np.append(corrs, corr,axis = 1) ordered_losses = np.argsort(losses) orderd_losses_ = np.sort(losses) ordered_loss_ten = extract_losses(ordered_losses) weights_indexes = get_common_indexes(ordered_loss_ten,blocks_generated,n_features,betas) ordered_weights_indexes = np.argsort(weights_indexes)[::-1]
if countIter == 5: lasso_cv.fit(x_train_saved, YTrain_) best_alpha = lasso_cv.alpha_ print(best_alpha) countIter = 0 model = linear_model.Lasso(fit_intercept=False, alpha=best_alpha) flag_linear = 0 else: model = linear_model.LinearRegression(fit_intercept=False) flag_linear = 1 blocks_generated, active_set, num_blocks = generate_samples_dynamic_set(num_blocks, n_features_transf, r, saved_indexes, r1, min_set, max_set, active_set, max_active_set) for i in range(0, num_blocks): x_train_i, x_val_i = get_current_data(XTrain_, XVal_, blocks_generated[i, :]) rand_vect = r1.choice(n_samples_val, active_set_samples, replace=False) x_val_i = x_val_i[rand_vect, :] YVal_i = YVal_[rand_vect] new_loss, beta = compute_mse(model, x_train_i, YTrain_, x_val_i, YVal_i, score, values_TM) losses.append(new_loss) betas.append(beta) ordered_losses = np.argsort(losses) orderd_losses_ = np.array(losses)[ordered_losses] standard_deviation = np.std(orderd_losses_) mean_weights = np.mean(orderd_losses_) chosen_losses = len(orderd_losses_[orderd_losses_ + standard_deviation <= mean_weights]) if chosen_losses > num_blocks / 3:
print("new_loss", new_loss) losses = [] indexes_tot = [] n_features = len(ordered_final_weights) extracted_indexes = [] for i in range(n_features): indexes = [] for k in ordered_final_weights[:i + 1]: indexes = np.union1d(indexes, dict_.get(k)) indexes_tot.append(indexes) indexes = np.array(indexes).astype("int64") XTrain_current, XTest_current = get_current_data(XTrain, XVal, indexes) print("----------------------------") print("iteration ", i) keys_sel = ordered_final_weights[:i+1] weights_ = weights[indexes] model = Shooting(weights_) lasso = LASSOEstimator(model) loss, beta = compute_weightedLASSO(lasso,XTrain_current,YTrain, XTest_current, YVal,scoring, score_f, verbose, values_TM=[]) losses.append(loss) beta = np.abs(beta)
#real_indexes = [] #if verbose: # print("loss LASSO test", new_loss) # print("------------------") ###recompute weights if weights_all: weights = assign_weights(weights_data.copy()) weights = weights[indexes_beta] else: weights = assign_weights(weights_data.copy()[indexes_beta]) XTrain_current, XTest_current = get_current_data(XTrain, XTest,indexes_beta) ###compute LASSO print("-------------") new_loss, beta = compute_lasso(XTrain_current, YTrain, XTest_current, YTest, score=score,values_TM = values_TM) beta = np.abs(beta[:, 0]) beta_indexes,beta_ordered = get_beta_div_zeros(beta) print("loss insieme ridotto", new_loss) print(indexes_beta[beta_indexes]) print(weights_level[beta_indexes]) print("------------") print("----------------") model = Shooting(weights)
#weights_std = (ordered_values - min_)/ (max_ - min_) #weights = weights_std #norm = Normalizer(norm = "max") #weights_std = norm.fit_transform(ordered_values)[0] #weights = 1-weights_std #weights = weights[:index_mse*3] current_informative = np.intersect1d(ordered_indexes, informative_indexes) current_not_informative = np.array(list(set(ordered_indexes)-set(current_informative))) if verbose: print("informative", len(current_informative), "su", len(ordered_indexes)) print("non informative",len(current_not_informative),"su", len(ordered_indexes)) current_train, current_test = get_current_data(XTrain, XTest, ordered_indexes) indexes_to_extract.append(ordered_indexes) model_list = {Shooting(weights)} ext_data = ".npz" ext_model = ".pkl" for model in model_list: lasso = LASSOEstimator(model) clf = GridSearchCV(lasso, parameters, fit_params = {"verbose" : False}, cv=3, scoring="mean_squared_error") clf.fit(current_train, YTrain) lambda_opt = clf.best_params_ if verbose: print("best lambda", lambda_opt)
print("loss", new_loss, "beta inf", len(index_inf), ", beta non inf",len(index_beta)) real_indexes = [] iter = np.argmin(mses) print("iter", iter) indexes = ordered_final_weights[:iter+1].astype("int64") if weights_all: weights = assign_weights(weights_data.copy()) weights = weights[indexes] else: weights = assign_weights(weights_data.copy()[indexes]) ###compute weighted LASSO on val XTrainVal, YTrainVal, XVal, YVal = results_cross_val.extract_train_val() XTrain_Valcurrent, XVal_current = get_current_data(XTrainVal, XVal, indexes) print("----------------------------") model = Shooting(weights) lasso = LASSOEstimator(model) loss, beta = compute_weightedLASSO(lasso,XTrain_Valcurrent,YTrainVal, XVal_current, YVal,scoring, score_f, verbose, values_TM) beta = np.abs(beta) beta_indexes,beta_ordered = get_beta_div_zeros(beta) ##new indexes final_indexes = indexes[beta_indexes] print("final indexes", final_indexes) XTrain_current, XTest_current = get_current_data(XTrain, XTest,final_indexes)