def get_ell(self, X, y, init, rad): if self.intercept: z_init = np.zeros(X.shape[1] + 1) r_init = X.shape[1] + 1 else: z_init = np.zeros(X.shape[1]) r_init = X.shape[1] if self.better_init is not 0: est = fit_estimator(X, y, self.loss, self.penalty, self.mu, self.lmbda, self.intercept, max_iter=self.better_init, ars=self.ars) if self.classification: if self.ars and self.loss != 'safe_logistic': z_init = est.w else: z_init = est.coef_[0] if self.intercept: z_init = np.append(z_init, est.intercept_) else: z_init = est.coef_ if self.intercept: z_init = np.append(z_init, est.intercept_) if init is not None and self.better_init == 0: z_init = init if rad != 0 and self.better_radius == 0: r_init = rad if self.better_radius != 0: r_init = float(self.better_radius) if self.acceleration: self.iter_ell_accelerated(X, y, z_init, r_init) elif self.dc: self.iter_ell_dc(X, y, z_init, r_init) else: self.iter_ell(X, y, z_init, r_init) return
def experiment_acc(dataset, loss, penalty, lmbda): acc = {} random.seed(0) np.random.seed(0) if dataset == 'mnist': size = 60000 elif dataset == 'svhn': size = 604388 elif dataset == 'rcv1': size = 781265 X, y = load_experiment(dataset=dataset, synth_params=None, size=size, redundant=0, noise=0, classification=True) score = 0 k = 0 while k < 3: k += 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) estimator = fit_estimator(X_train, y_train, loss=loss, penalty=penalty, mu=1, lmbda=lmbda, intercept=False, max_iter=10000) score += estimator.score(X_test, y_test) acc['{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)] = score / 3 print('{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda), ' : Done !') save_dataset_folder = os.path.join(RESULTS_PATH, 'accuracies') os.makedirs(save_dataset_folder, exist_ok=True) np.save( os.path.join(save_dataset_folder, '{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)), acc) print('RESULTS SAVED!') return
acceleration=True, dc=False, use_sphere=False, ars=True).fit(X_train, y_train) prop = np.unique(y_test, return_counts=True)[1] print('BASELINE : ', 1 - prop[1] / prop[0]) print('SCORE SCREENER : ', screener.score(X_test, y_test)) #print(screener.z) scores = screener.screen(X_train, y_train) idx_safeell = np.where(scores > 0)[0] print('NB TO KEEP', len(idx_safeell)) if len(idx_safeell) != 0: estimator_whole = fit_estimator(X_train, y_train, loss='safe_logistic', penalty='l2', mu=0, lmbda=0, intercept=False) print(y_train[idx_safeell][:10]) print(estimator_whole.score(X_test, y_test)) estimator_screened = fit_estimator(X_train[idx_safeell], y_train[idx_safeell], loss='safe_logistic', penalty='l2', mu=0, lmbda=0, intercept=False) print(estimator_screened.score(X_test, y_test)) temp = np.array([ estimator_whole.score(X_train, y_train),
def experiment_reg(dataset, synth_params, size, scale_data, redundant, noise, nb_delete_steps, lmbda, mu, loss, penalty, intercept, n_ellipsoid_steps, better_init, better_radius, cut, get_ell_from_subset, clip_ell, use_sphere, guarantee, nb_exp, nb_test, plot, zoom, dontsave): print('START') exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_intercept_{}_mu_{}_redundant_{}_noise_{}_better_init_{}_better_radius_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_nds_{}'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, intercept, mu, redundant, noise, better_init, better_radius, cut, clip_ell, use_sphere, nb_delete_steps) print(exp_title) X, y = load_experiment(dataset, synth_params, size, redundant, noise, classification=True) scores_regular_all = [] scores_ell_all = [] scores_r_all = [] safe_guarantee = np.array([0., 0.]) compt_exp = 0 nb_safe_ell_all = 0 while compt_exp < nb_exp: random.seed(compt_exp + 1) np.random.seed(compt_exp + 1) compt_exp += 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) print('Ellipsoid steps to be done : ', n_ellipsoid_steps) screener_ell = EllipsoidScreener(lmbda=lmbda, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=n_ellipsoid_steps, better_init=better_init, better_radius=better_radius, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere) if scale_data: scaler = StandardScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) if get_ell_from_subset != 0: random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) screener_ell.fit(X_train[random_subset], y_train[random_subset]) else: screener_ell.fit(X_train, y_train) scores_screenell = screener_ell.screen(X_train, y_train) idx_screenell = np.argsort(scores_screenell) print('SCORES_ELL', scores_screenell[:10]) nb_safe_ell_all += get_nb_safe(scores_screenell, mu, classification=True) scores_regular = [] scores_ell = [] scores_r = [] nb_to_del_table = None if guarantee: idx_safeell = np.where(scores_screenell > -mu)[0] if len(idx_safeell) != 0: estimator_whole = fit_estimator(X_train, y_train, loss, penalty, mu, lmbda, intercept) estimator_screened = fit_estimator(X_train[idx_safeell], y_train[idx_safeell], loss, penalty, mu, lmbda, intercept) temp = np.array([ estimator_whole.score(X_train, y_train), estimator_screened.score(X_train, y_train) ]) safe_guarantee += temp print('SAFE GUARANTEE : ', temp) if nb_delete_steps != 0: nb_to_del_table = np.sqrt( np.linspace(1, X_train.shape[0], nb_delete_steps, dtype='int')) nb_to_del_table = np.ceil( nb_to_del_table * (X_train.shape[0] / nb_to_del_table[-1])).astype(int) X_r = X_train y_r = y_train for i, nb_to_delete in enumerate(nb_to_del_table): if i == 0: score_regular = 0 score_ell = 0 score_r = 0 compt = 0 X_screenell, y_screenell = X_train[idx_screenell[ nb_to_delete:]], y_train[idx_screenell[nb_to_delete:]] X_r, y_r = X_train[nb_to_delete:], y_train[nb_to_delete:] if not (dataset_has_both_labels(y_r)): print( 'Warning, only one label in randomly screened dataset') if not (dataset_has_both_labels(y_screenell)): print('Warning, only one label in screenell dataset') if not (dataset_has_both_labels(y_r) and dataset_has_both_labels(y_screenell)): break print('X_train :', X_train.shape, 'X_screenell :', X_screenell.shape, 'X_random : ', X_r.shape) while compt < nb_test: compt += 1 if i == 0: estimator_regular = fit_estimator(X_train, y_train, loss=loss, penalty=penalty, mu=mu, lmbda=lmbda, intercept=intercept) estimator_screenell = fit_estimator(X_screenell, y_screenell, loss=loss, penalty=penalty, mu=mu, lmbda=lmbda, intercept=intercept) estimator_r = fit_estimator(X_r, y_r, loss=loss, penalty=penalty, mu=mu, lmbda=lmbda, intercept=intercept) if i == 0: score_regular += estimator_regular.score( X_test, y_test) score_ell += estimator_screenell.score(X_test, y_test) score_r += estimator_r.score(X_test, y_test) scores_regular.append(score_regular / nb_test) scores_ell.append(score_ell / nb_test) scores_r.append(score_r / nb_test) scores_regular_all.append(scores_regular) scores_ell_all.append(scores_ell) scores_r_all.append(scores_r) print('Number of datapoints we can safely screen with ellipsoid method:', nb_safe_ell_all / nb_exp) data = { 'nb_to_del_table': nb_to_del_table, 'scores_regular': scores_regular_all, 'scores_ell': scores_ell_all, 'scores_r': scores_r_all, 'nb_safe_ell': nb_safe_ell_all / nb_exp, 'train_set_size': X_train.shape[0], 'safe_guarantee': safe_guarantee / nb_exp } save_dataset_folder = os.path.join(RESULTS_PATH, dataset) os.makedirs(save_dataset_folder, exist_ok=True) if not dontsave: np.save(os.path.join(save_dataset_folder, exp_title), data) print('RESULTS SAVED!') if plot: plot_experiment(data, zoom=zoom) print('END') return
def experiment_tradeoff(dataset, synth_params, size, scale_data, redundant, noise, lmbda, mu, loss, penalty, intercept, acc, rescale, n_ellipsoid_steps, better_init, cut, get_ell_from_subset, clip_ell, use_sphere, guarantee, nb_exp, plot, zoom, dontsave): print('START') X, y = load_experiment(dataset, synth_params, size, redundant, noise, classification=True) if acc: exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_acc'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu, better_init, cut, clip_ell, use_sphere) elif rescale: exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff_rescale'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu, better_init, cut, clip_ell, use_sphere) else: exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff'.format( size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu, better_init, cut, clip_ell, use_sphere) print(exp_title) nb_epochs = int(better_init + n_ellipsoid_steps * get_ell_from_subset / (0.8 * X.shape[0])) scores_screening_all = np.zeros(nb_epochs) safe_guarantee = np.array([0., 0.]) compt_exp = 0 while compt_exp < nb_exp: #random.seed(compt_exp + 1) #np.random.seed(compt_exp + 1) compt_exp += 1 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) if acc: for i in range(nb_epochs): estimator = LinearSVC(loss='squared_hinge', dual=False, C=1 / lmbda, fit_intercept=False, max_iter=i + 1, tol=1.0e-20).fit(X_train, y_train) scores_screening_all[i] += estimator.score(X_test, y_test) print(scores_screening_all[i]) print('SCORES', scores_screening_all) else: for i in range(nb_epochs): i = i + 1 if i <= better_init: screener_dg = DualityGapScreener(lmbda=lmbda, n_epochs=i).fit( X_train, y_train) z_init = screener_dg.z rad_init = screener_dg.squared_radius scores = screener_dg.screen(X_train, y_train) scores_screening_all[i - 1] += get_nb_safe( scores, mu, classification=True) print('SCREEN DG RADIUS', screener_dg.squared_radius) elif better_init < i <= nb_epochs: if rescale: lmbda_ = lmbda * X_train.shape[0] / get_ell_from_subset else: lmbda_ = lmbda random_subset = random.sample(range(0, X_train.shape[0]), get_ell_from_subset) screener_ell = EllipsoidScreener( lmbda=lmbda_, mu=mu, loss=loss, penalty=penalty, intercept=intercept, classification=True, n_ellipsoid_steps=int( (i - better_init) * X_train.shape[0] / get_ell_from_subset), better_init=0, better_radius=0, cut=cut, clip_ell=clip_ell, use_sphere=use_sphere).fit(X_train[random_subset], y_train[random_subset], init=z_init, rad=rad_init) scores = screener_ell.screen(X_train, y_train) scores_screening_all[i - 1] += get_nb_safe( scores, mu, classification=True) if use_sphere: print('SCREEN ELL RADIUS', screener_ell.squared_radius) if guarantee: idx_safeell = np.where(scores > -mu)[0] print('SCORES ', scores) print('NB TO KEEP', len(idx_safeell)) if len(idx_safeell) != 0: estimator_whole = fit_estimator(X_train, y_train, loss, penalty, mu, lmbda, intercept) if rescale: lmbda_ = lmbda * X_train.shape[0] / len(idx_safeell) estimator_screened = fit_estimator(X_train[idx_safeell], y_train[idx_safeell], loss, penalty, mu, lmbda_, intercept) temp = np.array([ estimator_whole.score(X_train, y_train), estimator_screened.score(X_train, y_train) ]) print('SAFE GUARANTEE : ', temp) safe_guarantee += temp if acc: scores_screening_all = scores_screening_all * X_train.shape[0] data = { 'step_table': better_init + n_ellipsoid_steps * (get_ell_from_subset / X_train.shape[0]), 'scores_screening': scores_screening_all / (X_train.shape[0] * nb_exp), 'safe_guarantee': safe_guarantee / nb_exp } print(data) save_dataset_folder = os.path.join(RESULTS_PATH, dataset) os.makedirs(save_dataset_folder, exist_ok=True) if not dontsave: np.save(os.path.join(save_dataset_folder, exp_title), data) print('RESULTS SAVED!') if plot: plot_experiment(data, zoom=zoom) print('END') return