def experiment_acc(dataset, loss, penalty, lmbda): acc = {} random.seed(0) np.random.seed(0) if dataset == 'mnist': size = 60000 elif dataset == 'svhn': size = 604388 elif dataset == 'rcv1': size = 781265 X, y = load_experiment(dataset=dataset, synth_params=None, size=size, redundant=0, noise=0, classification=True) score = 0 k = 0 while k < 3: k += 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) estimator = fit_estimator(X_train, y_train, loss=loss, penalty=penalty, mu=1, lmbda=lmbda, intercept=False, max_iter=10000) score += estimator.score(X_test, y_test) acc['{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)] = score / 3 print('{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda), ' : Done !') save_dataset_folder = os.path.join(RESULTS_PATH, 'accuracies') os.makedirs(save_dataset_folder, exist_ok=True) np.save( os.path.join(save_dataset_folder, '{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)), acc) print('RESULTS SAVED!') return
self.scores = rank_dataset_accelerated(X, y, self.z, self.scaling, self.L, self.I_k_vec, self.g, self.mu, self.classification, self.intercept, self.cut) return self.scores if __name__ == "__main__": # simple test from sklearn.model_selection import train_test_split from utils.loaders import load_experiment X, y = load_experiment(dataset='cifar10_kernel', synth_params=None, size=10000, redundant=0, noise=None, classification=True) #random.seed(0) #np.random.seed(0) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) z_init = np.random.rand(X_train.shape[1]) screener = EllipsoidScreener(lmbda=0, mu=0, loss='safe_logistic', penalty='l2', intercept=False, classification=True, n_ellipsoid_steps=2000,
def experiment_reg(dataset, synth_params, size, scale_data, redundant, noise, nb_delete_steps, lmbda, mu, loss, penalty, intercept, n_ellipsoid_steps, better_init, better_radius, cut, get_ell_from_subset, clip_ell, use_sphere, guarantee, nb_exp, nb_test, plot, zoom, dontsave): print('START') exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_intercept_{}_mu_{}_redundant_{}_noise_{}_better_init_{}_better_radius_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_nds_{}'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, intercept, mu, redundant, noise, better_init, better_radius, cut, clip_ell, use_sphere, nb_delete_steps) print(exp_title) X, y = load_experiment(dataset, synth_params, size, redundant, noise, classification=True) scores_regular_all = [] scores_ell_all = [] scores_r_all = [] safe_guarantee = np.array([0., 0.]) compt_exp = 0 nb_safe_ell_all = 0 while compt_exp < nb_exp: random.seed(compt_exp + 1) np.random.seed(compt_exp + 1) compt_exp += 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) print('Ellipsoid steps to be done : ', n_ellipsoid_steps) screener_ell = EllipsoidScreener(lmbda=lmbda, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=n_ellipsoid_steps, better_init=better_init, better_radius=better_radius, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere) if scale_data: scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) if get_ell_from_subset != 0: random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) screener_ell.fit(X_train[random_subset], y_train[random_subset]) else: screener_ell.fit(X_train, y_train) scores_screenell = screener_ell.screen(X_train, y_train) idx_screenell = np.argsort(scores_screenell) print('SCORES_ELL', scores_screenell[:10]) nb_safe_ell_all += get_nb_safe(scores_screenell, mu, classification=True) scores_regular = [] scores_ell = [] scores_r = [] nb_to_del_table = None if guarantee: idx_safeell = np.where(scores_screenell > -mu)[0] if len(idx_safeell) != 0: estimator_whole = fit_estimator(X_train, y_train, loss, penalty, mu, lmbda, intercept) estimator_screened = fit_estimator(X_train[idx_safeell], y_train[idx_safeell], loss, penalty, mu, lmbda, intercept) temp = np.array([ estimator_whole.score(X_train, y_train), estimator_screened.score(X_train, y_train) ]) safe_guarantee += temp print('SAFE GUARANTEE : ', temp) if nb_delete_steps != 0: nb_to_del_table = np.sqrt( np.linspace(1, X_train.shape[0], nb_delete_steps, dtype='int')) nb_to_del_table = np.ceil( nb_to_del_table * (X_train.shape[0] / nb_to_del_table[-1])).astype(int) X_r = X_train y_r = y_train for i, nb_to_delete in enumerate(nb_to_del_table): if i == 0: score_regular = 0 score_ell = 0 score_r = 0 compt = 0 X_screenell, y_screenell = X_train[idx_screenell[ nb_to_delete:]], y_train[idx_screenell[nb_to_delete:]] X_r, y_r = X_train[nb_to_delete:], y_train[nb_to_delete:] if not (dataset_has_both_labels(y_r)): print( 'Warning, only one label in randomly screened dataset') if not (dataset_has_both_labels(y_screenell)): print('Warning, only one label in screenell dataset') if not (dataset_has_both_labels(y_r) and dataset_has_both_labels(y_screenell)): break print('X_train :', X_train.shape, 'X_screenell :', X_screenell.shape, 'X_random : ', X_r.shape) while compt < nb_test: compt += 1 if i == 0: estimator_regular = fit_estimator(X_train, y_train, loss=loss, penalty=penalty, mu=mu, lmbda=lmbda, intercept=intercept) estimator_screenell = fit_estimator(X_screenell, y_screenell, loss=loss, penalty=penalty, mu=mu, lmbda=lmbda, intercept=intercept) estimator_r = fit_estimator(X_r, y_r, loss=loss, penalty=penalty, mu=mu, lmbda=lmbda, intercept=intercept) if i == 0: score_regular += estimator_regular.score( X_test, y_test) score_ell += estimator_screenell.score(X_test, y_test) score_r += estimator_r.score(X_test, y_test) scores_regular.append(score_regular / nb_test) scores_ell.append(score_ell / nb_test) scores_r.append(score_r / nb_test) scores_regular_all.append(scores_regular) scores_ell_all.append(scores_ell) scores_r_all.append(scores_r) print('Number of datapoints we can safely screen with ellipsoid method:', nb_safe_ell_all / nb_exp) data = { 'nb_to_del_table': nb_to_del_table, 'scores_regular': scores_regular_all, 'scores_ell': scores_ell_all, 'scores_r': scores_r_all, 'nb_safe_ell': nb_safe_ell_all / nb_exp, 'train_set_size': X_train.shape[0], 'safe_guarantee': safe_guarantee / nb_exp } save_dataset_folder = os.path.join(RESULTS_PATH, dataset) os.makedirs(save_dataset_folder, exist_ok=True) if not dontsave: np.save(os.path.join(save_dataset_folder, exp_title), data) print('RESULTS SAVED!') if plot: plot_experiment(data, zoom=zoom) print('END') return
def experiment_tradeoff(dataset, synth_params, size, scale_data, redundant, noise, lmbda, mu, loss, penalty, intercept, acc, rescale, n_ellipsoid_steps, better_init, cut, get_ell_from_subset, clip_ell, use_sphere, guarantee, nb_exp, plot, zoom, dontsave): print('START') X, y = load_experiment(dataset, synth_params, size, redundant, noise, classification=True) if acc: exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_acc'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu, better_init, cut, clip_ell, use_sphere) elif rescale: exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff_rescale'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu, better_init, cut, clip_ell, use_sphere) else: exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu, better_init, cut, clip_ell, use_sphere) print(exp_title) nb_epochs = int(better_init + n_ellipsoid_steps * get_ell_from_subset / (0.8 * X.shape[0])) scores_screening_all = np.zeros(nb_epochs) safe_guarantee = np.array([0., 0.]) compt_exp = 0 while compt_exp < nb_exp: #random.seed(compt_exp + 1) #np.random.seed(compt_exp + 1) compt_exp += 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) if acc: for i in range(nb_epochs): estimator = LinearSVC(loss='squared_hinge', dual=False, C=1 / lmbda, fit_intercept=False, max_iter=i + 1, tol=1.0e-20).fit(X_train, y_train) scores_screening_all[i] += estimator.score(X_test, y_test) print(scores_screening_all[i]) print('SCORES', scores_screening_all) else: for i in range(nb_epochs): i = i + 1 if i <= better_init: screener_dg = DualityGapScreener(lmbda=lmbda, n_epochs=i).fit( X_train, y_train) z_init = screener_dg.z rad_init = screener_dg.squared_radius scores = screener_dg.screen(X_train, y_train) scores_screening_all[i - 1] += get_nb_safe( scores, mu, classification=True) print('SCREEN DG RADIUS', screener_dg.squared_radius) elif better_init < i <= nb_epochs: if rescale: lmbda_ = lmbda * X_train.shape[0] / get_ell_from_subset else: lmbda_ = lmbda random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) screener_ell = EllipsoidScreener( lmbda=lmbda_, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=int( (i - better_init) * X_train.shape[0] / get_ell_from_subset), better_init=0, better_radius=0, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere).fit(X_train[random_subset], y_train[random_subset], init=z_init, rad=rad_init) scores = screener_ell.screen(X_train, y_train) scores_screening_all[i - 1] += get_nb_safe( scores, mu, classification=True) if use_sphere: print('SCREEN ELL RADIUS', screener_ell.squared_radius) if guarantee: idx_safeell = np.where(scores > -mu)[0] print('SCORES ', scores) print('NB TO KEEP', len(idx_safeell)) if len(idx_safeell) != 0: estimator_whole = fit_estimator(X_train, y_train, loss, penalty, mu, lmbda, intercept) if rescale: lmbda_ = lmbda * X_train.shape[0] / len(idx_safeell) estimator_screened = fit_estimator(X_train[idx_safeell], y_train[idx_safeell], loss, penalty, mu, lmbda_, intercept) temp = np.array([ estimator_whole.score(X_train, y_train), estimator_screened.score(X_train, y_train) ]) print('SAFE GUARANTEE : ', temp) safe_guarantee += temp if acc: scores_screening_all = scores_screening_all * X_train.shape[0] data = { 'step_table': better_init + n_ellipsoid_steps * (get_ell_from_subset / X_train.shape[0]), 'scores_screening': scores_screening_all / (X_train.shape[0] * nb_exp), 'safe_guarantee': safe_guarantee / nb_exp } print(data) save_dataset_folder = os.path.join(RESULTS_PATH, dataset) os.makedirs(save_dataset_folder, exist_ok=True) if not dontsave: np.save(os.path.join(save_dataset_folder, exp_title), data) print('RESULTS SAVED!') if plot: plot_experiment(data, zoom=zoom) print('END') return
def experiment_regpath(dataset, synth_params, size, scale_data, redundant, noise, lmbda_grid_start, lmbda_grid_end, lmbda_grid_num, mu, loss, penalty, intercept, n_ellipsoid_steps, n_epochs, n_epochs_ell_path, cut, get_ell_from_subset, clip_ell, use_sphere, nb_exp, dontsave): print('START') exp_title = 'X_size_{}_ell_subset_{}_loss_{}_n_ell_{}_mu_{}_cut_ell_{}_n_epochs_{}_n_ell_path_{}_use_sphere_{}_start_{}_end_{}_num_{}_regpath'.format( size, get_ell_from_subset, loss, n_ellipsoid_steps, mu, cut, n_epochs, n_epochs_ell_path, use_sphere, lmbda_grid_start, lmbda_grid_end, lmbda_grid_num) print(exp_title) X, y = load_experiment(dataset, synth_params, size, redundant, noise, classification=True) data = {} lmbda_grid = np.logspace(lmbda_grid_start, lmbda_grid_end, num=lmbda_grid_num) for lmbda in lmbda_grid: data['budget_ell_lmbda_{}'.format(lmbda)] = 0 data['budget_noscreen_lmbda_{}'.format(lmbda)] = 0 data['score_ell_lmbda_{}'.format(lmbda)] = 0 data['score_noscreen_lmbda_{}'.format(lmbda)] = 0 compt_exp = 0 while compt_exp < nb_exp: random.seed(compt_exp + 1) np.random.seed(compt_exp + 1) compt_exp += 1 X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2) for lmbda in lmbda_grid: print('---------- LMBDA ---------: ', lmbda) budget_ell = 0 budget_noscreen = 0 if lmbda == lmbda_grid[0]: screener_ell = EllipsoidScreener( lmbda=lmbda * X_train.shape[0] / get_ell_from_subset, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=n_ellipsoid_steps, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere, ars=True) screener_dg = DualityGapScreener(lmbda=lmbda, n_epochs=n_epochs, ars=True) screener_dg.fit(X_train, y_train) print('Init radius : ', screener_dg.squared_radius) random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) screener_ell.fit(X_train[random_subset], y_train[random_subset], init=screener_dg.z, rad=screener_dg.squared_radius) svc = BinaryClassifier(loss='sqhinge', penalty=penalty, fit_intercept=intercept) svc.fit(X_train, y_train, solver='qning-svrg', lambd=lmbda, verbose=False) svc_ell = BinaryClassifier(loss='sqhinge', penalty=penalty, fit_intercept=intercept) svc_ell.fit(X_train, y_train, solver='qning-svrg', lambd=lmbda, verbose=False) else: budget_fit_solver_noscreen = svc.fit(X_train, y_train, solver='qning-svrg', it0=1, lambd=lmbda, restart=True, verbose=False)[0, -1] print('Epoch fit solver no screen :', budget_fit_solver_noscreen) budget_noscreen += budget_fit_solver_noscreen * X_train.shape[0] print('Budget solver no screen :', budget_noscreen) info = svc_ell.fit(X_train, y_train, solver='qning-svrg', lambd=lmbda, verbose=False, max_epochs=n_epochs_ell_path, it0=1, restart=True) dg = info[1, -1] - info[2, -1] screener_ell = EllipsoidScreener( lmbda=lmbda * X_train.shape[0] / get_ell_from_subset, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=n_ellipsoid_steps, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere, ars=True) random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) print('Init rad : ', 2 * dg / lmbda) screener_ell.fit(X_train[random_subset], y_train[random_subset], init=svc_ell.w, rad=2 * dg / lmbda) if use_sphere and n_ellipsoid_steps > 0: print('Final rad : ', screener_ell.squared_radius) scores_ell = screener_ell.screen(X_train, y_train) tokeep = np.where(scores_ell > -mu)[0] print('To keep : ', len(tokeep)) budget_fit_solver = svc_ell.fit( X_train[tokeep], y_train[tokeep], solver='qning-svrg', it0=1, lambd=lmbda * (X_train.shape[0] / len(tokeep)), restart=True, verbose=False)[0, -1] budget_init_ell = (n_epochs_ell_path) * X_train.shape[0] budget_fit_ell = n_ellipsoid_steps * get_ell_from_subset if cut: budget_fit_ell += get_ell_from_subset budget_ell += budget_init_ell + budget_fit_ell + budget_fit_solver * len( tokeep) print('Epoch fit solver screen', budget_fit_solver) print('Budget solver screen : ', budget_init_ell, budget_fit_ell, budget_fit_solver * len(tokeep)) score_ell = svc_ell.score(X_train, y_train) score_noscreen = svc.score(X_train, y_train) print('Score on screened : ', score_ell, 'Score on whole : ', score_noscreen) data['budget_ell_lmbda_{}'.format(lmbda)] += budget_ell data['budget_noscreen_lmbda_{}'.format(lmbda)] += budget_noscreen data['score_ell_lmbda_{}'.format(lmbda)] += score_ell data['score_noscreen_lmbda_{}'.format(lmbda)] += score_noscreen data = {k: float(data[k] / nb_exp) for k in data} save_dataset_folder = os.path.join(RESULTS_PATH, dataset) os.makedirs(save_dataset_folder, exist_ok=True) if not dontsave: np.save(os.path.join(save_dataset_folder, exp_title), data) print('RESULTS SAVED!') print('END') print(data) return
g=None, mu=1, classification=True, intercept=False, cut=False) if __name__ == "__main__": # simple test from sklearn.model_selection import train_test_split from utils.loaders import load_experiment import random X, y = load_experiment(dataset='mnist', synth_params=None, size=60000, redundant=0, noise=None, classification=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) prop = np.unique(y_test, return_counts=True)[1] print('BASELINE : ', 1 - prop[1] / prop[0]) screener = DualityGapScreener(lmbda=1e-5, n_epochs=9, ars=True).fit(X_train, y_train) print('Squared Radius : ', 2 * screener.dg / 1e-5) print('Score : ', screener.score(X_test, y_test)) svc_ell = BinaryClassifier(loss='sqhinge', penalty='l2') budget_fit_solver = svc_ell.fit(