def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd if (scipy.sparse.issparse(self.X) and scipy.sparse.isspmatrix_csc(self.X)): self.X = scipy.sparse.csr_matrix(self.X) self.solver = BinaryClassifier(loss='logistic', penalty='l1', fit_intercept=False) self.solver_parameter = dict(lambd=self.lmbd / self.X.shape[0], solver='auto', it0=1000000, tol=1e-12, verbose=False)
def fit(self, X_train, y_train, init=None): start = time.time() self.X_train = X_train self.y_train = y_train if self.ars: svc = BinaryClassifier(loss='sqhinge', penalty='l2') if init is not None: svc.w = init restart = True else: restart = False #self.first_obj, self.first_dg = self.get_duality_gap(svc) info = svc.fit(X_train, y_train, lambd=self.lmbda, solver='qning-svrg', max_epochs=self.n_epochs, it0=1, tol=1.0e-20, restart=restart, verbose=False) self.loss = info[1, -1] self.dg = self.loss - info[2, -1] self.z = svc.w.reshape(-1, ) else: first_svc = LinearSVC(loss='squared_hinge', dual=False, C=1 / self.lmbda, fit_intercept=False, max_iter=0, tol=1.0e-20).fit(self.X_train, self.y_train) self.first_obj, self.first_dg = self.get_duality_gap(first_svc) svc = LinearSVC(loss='squared_hinge', dual=False, C=1 / self.lmbda, fit_intercept=False, max_iter=self.n_epochs, tol=1.0e-20).fit(self.X_train, self.y_train) self.z = svc.coef_.reshape(-1, ) self.loss, self.dg = self.get_duality_gap(svc) self.squared_radius = 2 * self.dg / self.lmbda end = time.time() print('Time to fit DualityGapScreener :', end - start) return self
def fit_estimator(X, y, loss, penalty, mu, lmbda, intercept, max_iter=10000, ars=False): if loss == 'truncated_squared' and penalty == 'l1': estimator = Lasso(alpha=lmbda, fit_intercept=intercept, max_iter=max_iter).fit(X, y) elif loss == 'squared' and penalty == 'l1': estimator = Lasso(alpha=lmbda, fit_intercept=intercept, max_iter=max_iter).fit(X, y) elif loss == 'hinge' and penalty == 'l2': estimator = LinearSVC(C=1 / lmbda, loss=loss, penalty=penalty, fit_intercept=intercept, max_iter=max_iter).fit(X, y) elif loss == 'squared_hinge' and not (ars): estimator = LinearSVC(C=1 / lmbda, loss=loss, dual=False, penalty=penalty, fit_intercept=intercept, max_iter=1000).fit(X, y) elif loss == 'squared_hinge' and ars: estimator = BinaryClassifier(loss='sqhinge', penalty=penalty, fit_intercept=intercept) estimator.fit(X, y, lambd=lmbda, solver='catalyst-miso', nepochs=max_iter, verbose=False) elif loss == 'safe_logistic': estimator = SafeLogistic(lmbda=lmbda, penalty=penalty, max_iter=max_iter).fit(X, y) elif loss == 'logistic': estimator = LogisticRegression(C=1 / lmbda, penalty=penalty, fit_intercept=intercept).fit(X, y) else: print('ERROR, you picked a combination which is not implemented.') return estimator
class Solver(BaseSolver): name = 'Cyanure' install_cmd = 'pip' package_name = 'cyanure-mkl' package_import = 'cyanure' def set_objective(self, X, y, lmbd): self.X, self.y, self.lmbd = X, y, lmbd self.solver = BinaryClassifier(loss='logistic', penalty='l1', fit_intercept=False) self.solver_parameter = dict( lambd=self.lmbd / self.X.shape[0], tol=1e-12, verbose=False ) def run(self, n_iter): self.solver.fit(self.X, self.y, max_epochs=n_iter, **self.solver_parameter) def get_result(self): return self.solver.get_weights()
def experiment_regpath(dataset, synth_params, size, scale_data, redundant, noise, lmbda_grid_start, lmbda_grid_end, lmbda_grid_num, mu, loss, penalty, intercept, n_ellipsoid_steps, n_epochs, n_epochs_ell_path, cut, get_ell_from_subset, clip_ell, use_sphere, nb_exp, dontsave): print('START') exp_title = 'X_size_{}_ell_subset_{}_loss_{}_n_ell_{}_mu_{}_cut_ell_{}_n_epochs_{}_n_ell_path_{}_use_sphere_{}_start_{}_end_{}_num_{}_regpath'.format( size, get_ell_from_subset, loss, n_ellipsoid_steps, mu, cut, n_epochs, n_epochs_ell_path, use_sphere, lmbda_grid_start, lmbda_grid_end, lmbda_grid_num) print(exp_title) X, y = load_experiment(dataset, synth_params, size, redundant, noise, classification=True) data = {} lmbda_grid = np.logspace(lmbda_grid_start, lmbda_grid_end, num=lmbda_grid_num) for lmbda in lmbda_grid: data['budget_ell_lmbda_{}'.format(lmbda)] = 0 data['budget_noscreen_lmbda_{}'.format(lmbda)] = 0 data['score_ell_lmbda_{}'.format(lmbda)] = 0 data['score_noscreen_lmbda_{}'.format(lmbda)] = 0 compt_exp = 0 while compt_exp < nb_exp: random.seed(compt_exp + 1) np.random.seed(compt_exp + 1) compt_exp += 1 X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2) for lmbda in lmbda_grid: print('---------- LMBDA ---------: ', lmbda) budget_ell = 0 budget_noscreen = 0 if lmbda == lmbda_grid[0]: screener_ell = EllipsoidScreener( lmbda=lmbda * X_train.shape[0] / get_ell_from_subset, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=n_ellipsoid_steps, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere, ars=True) screener_dg = DualityGapScreener(lmbda=lmbda, n_epochs=n_epochs, ars=True) screener_dg.fit(X_train, y_train) print('Init radius : ', screener_dg.squared_radius) random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) screener_ell.fit(X_train[random_subset], y_train[random_subset], init=screener_dg.z, rad=screener_dg.squared_radius) svc = BinaryClassifier(loss='sqhinge', penalty=penalty, fit_intercept=intercept) svc.fit(X_train, y_train, solver='qning-svrg', lambd=lmbda, verbose=False) svc_ell = BinaryClassifier(loss='sqhinge', penalty=penalty, fit_intercept=intercept) svc_ell.fit(X_train, y_train, solver='qning-svrg', lambd=lmbda, verbose=False) else: budget_fit_solver_noscreen = svc.fit(X_train, y_train, solver='qning-svrg', it0=1, lambd=lmbda, restart=True, verbose=False)[0, -1] print('Epoch fit solver no screen :', budget_fit_solver_noscreen) budget_noscreen += budget_fit_solver_noscreen * X_train.shape[0] print('Budget solver no screen :', budget_noscreen) info = svc_ell.fit(X_train, y_train, solver='qning-svrg', lambd=lmbda, verbose=False, max_epochs=n_epochs_ell_path, it0=1, restart=True) dg = info[1, -1] - info[2, -1] screener_ell = EllipsoidScreener( lmbda=lmbda * X_train.shape[0] / get_ell_from_subset, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=n_ellipsoid_steps, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere, ars=True) random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) print('Init rad : ', 2 * dg / lmbda) screener_ell.fit(X_train[random_subset], y_train[random_subset], init=svc_ell.w, rad=2 * dg / lmbda) if use_sphere and n_ellipsoid_steps > 0: print('Final rad : ', screener_ell.squared_radius) scores_ell = screener_ell.screen(X_train, y_train) tokeep = np.where(scores_ell > -mu)[0] print('To keep : ', len(tokeep)) budget_fit_solver = svc_ell.fit( X_train[tokeep], y_train[tokeep], solver='qning-svrg', it0=1, lambd=lmbda * (X_train.shape[0] / len(tokeep)), restart=True, verbose=False)[0, -1] budget_init_ell = (n_epochs_ell_path) * X_train.shape[0] budget_fit_ell = n_ellipsoid_steps * get_ell_from_subset if cut: budget_fit_ell += get_ell_from_subset budget_ell += budget_init_ell + budget_fit_ell + budget_fit_solver * len( tokeep) print('Epoch fit solver screen', budget_fit_solver) print('Budget solver screen : ', budget_init_ell, budget_fit_ell, budget_fit_solver * len(tokeep)) score_ell = svc_ell.score(X_train, y_train) score_noscreen = svc.score(X_train, y_train) print('Score on screened : ', score_ell, 'Score on whole : ', score_noscreen) data['budget_ell_lmbda_{}'.format(lmbda)] += budget_ell data['budget_noscreen_lmbda_{}'.format(lmbda)] += budget_noscreen data['score_ell_lmbda_{}'.format(lmbda)] += score_ell data['score_noscreen_lmbda_{}'.format(lmbda)] += score_noscreen data = {k: float(data[k] / nb_exp) for k in data} save_dataset_folder = os.path.join(RESULTS_PATH, dataset) os.makedirs(save_dataset_folder, exist_ok=True) if not dontsave: np.save(os.path.join(save_dataset_folder, exp_title), data) print('RESULTS SAVED!') print('END') print(data) return
size=60000, redundant=0, noise=None, classification=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) prop = np.unique(y_test, return_counts=True)[1] print('BASELINE : ', 1 - prop[1] / prop[0]) screener = DualityGapScreener(lmbda=1e-5, n_epochs=9, ars=True).fit(X_train, y_train) print('Squared Radius : ', 2 * screener.dg / 1e-5) print('Score : ', screener.score(X_test, y_test)) svc_ell = BinaryClassifier(loss='sqhinge', penalty='l2') budget_fit_solver = svc_ell.fit( X_train, y_train, solver='qning-svrg', it0=1, lambd=1e-5, verbose=False)[0, -1] #* X_train.shape[0] #len(tokeep_ell) print(budget_fit_solver, budget_fit_solver * X_train.shape[0]) scores = screener.screen(X_train, y_train) tokeep = np.where(scores > -1.0)[0] print('Nb kept : ', len(tokeep)) svc_ell = BinaryClassifier(loss='sqhinge', penalty='l2') budget_fit_solver = svc_ell.fit( X_train[tokeep],