def get_ell(self, X, y, init, rad):
        if self.intercept:
            z_init = np.zeros(X.shape[1] + 1)
            r_init = X.shape[1] + 1
        else:
            z_init = np.zeros(X.shape[1])
            r_init = X.shape[1]

        if self.better_init is not 0:
            est = fit_estimator(X,
                                y,
                                self.loss,
                                self.penalty,
                                self.mu,
                                self.lmbda,
                                self.intercept,
                                max_iter=self.better_init,
                                ars=self.ars)
            if self.classification:
                if self.ars and self.loss != 'safe_logistic':
                    z_init = est.w
                else:
                    z_init = est.coef_[0]
                if self.intercept:
                    z_init = np.append(z_init, est.intercept_)
            else:
                z_init = est.coef_
                if self.intercept:
                    z_init = np.append(z_init, est.intercept_)

        if init is not None and self.better_init == 0:
            z_init = init

        if rad != 0 and self.better_radius == 0:
            r_init = rad

        if self.better_radius != 0:
            r_init = float(self.better_radius)

        if self.acceleration:
            self.iter_ell_accelerated(X, y, z_init, r_init)
        elif self.dc:
            self.iter_ell_dc(X, y, z_init, r_init)
        else:
            self.iter_ell(X, y, z_init, r_init)

        return
def experiment_acc(dataset, loss, penalty, lmbda):

    acc = {}
    random.seed(0)
    np.random.seed(0)
    if dataset == 'mnist':
        size = 60000
    elif dataset == 'svhn':
        size = 604388
    elif dataset == 'rcv1':
        size = 781265
    X, y = load_experiment(dataset=dataset,
                           synth_params=None,
                           size=size,
                           redundant=0,
                           noise=0,
                           classification=True)
    score = 0
    k = 0
    while k < 3:
        k += 1
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        estimator = fit_estimator(X_train,
                                  y_train,
                                  loss=loss,
                                  penalty=penalty,
                                  mu=1,
                                  lmbda=lmbda,
                                  intercept=False,
                                  max_iter=10000)
        score += estimator.score(X_test, y_test)
    acc['{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)] = score / 3
    print('{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda), ' : Done !')

    save_dataset_folder = os.path.join(RESULTS_PATH, 'accuracies')
    os.makedirs(save_dataset_folder, exist_ok=True)
    np.save(
        os.path.join(save_dataset_folder,
                     '{}_{}_{}_{}'.format(dataset, loss, penalty, lmbda)), acc)
    print('RESULTS SAVED!')

    return
                              acceleration=True,
                              dc=False,
                              use_sphere=False,
                              ars=True).fit(X_train, y_train)
 prop = np.unique(y_test, return_counts=True)[1]
 print('BASELINE : ', 1 - prop[1] / prop[0])
 print('SCORE SCREENER : ', screener.score(X_test, y_test))
 #print(screener.z)
 scores = screener.screen(X_train, y_train)
 idx_safeell = np.where(scores > 0)[0]
 print('NB TO KEEP', len(idx_safeell))
 if len(idx_safeell) != 0:
     estimator_whole = fit_estimator(X_train,
                                     y_train,
                                     loss='safe_logistic',
                                     penalty='l2',
                                     mu=0,
                                     lmbda=0,
                                     intercept=False)
     print(y_train[idx_safeell][:10])
     print(estimator_whole.score(X_test, y_test))
     estimator_screened = fit_estimator(X_train[idx_safeell],
                                        y_train[idx_safeell],
                                        loss='safe_logistic',
                                        penalty='l2',
                                        mu=0,
                                        lmbda=0,
                                        intercept=False)
     print(estimator_screened.score(X_test, y_test))
     temp = np.array([
         estimator_whole.score(X_train, y_train),
Beispiel #4
0
def experiment_reg(dataset, synth_params, size, scale_data, redundant, noise,
                   nb_delete_steps, lmbda, mu, loss, penalty, intercept,
                   n_ellipsoid_steps, better_init, better_radius, cut,
                   get_ell_from_subset, clip_ell, use_sphere, guarantee,
                   nb_exp, nb_test, plot, zoom, dontsave):

    print('START')

    exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_intercept_{}_mu_{}_redundant_{}_noise_{}_better_init_{}_better_radius_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_nds_{}'.format(
        size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, intercept,
        mu, redundant, noise, better_init, better_radius, cut, clip_ell,
        use_sphere, nb_delete_steps)
    print(exp_title)

    X, y = load_experiment(dataset,
                           synth_params,
                           size,
                           redundant,
                           noise,
                           classification=True)

    scores_regular_all = []
    scores_ell_all = []
    scores_r_all = []
    safe_guarantee = np.array([0., 0.])

    compt_exp = 0
    nb_safe_ell_all = 0

    while compt_exp < nb_exp:
        random.seed(compt_exp + 1)
        np.random.seed(compt_exp + 1)
        compt_exp += 1
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)
        print('Ellipsoid steps to be done : ', n_ellipsoid_steps)
        screener_ell = EllipsoidScreener(lmbda=lmbda,
                                         mu=mu,
                                         loss=loss,
                                         penalty=penalty,
                                         intercept=intercept,
                                         classification=True,
                                         n_ellipsoid_steps=n_ellipsoid_steps,
                                         better_init=better_init,
                                         better_radius=better_radius,
                                         cut=cut,
                                         clip_ell=clip_ell,
                                         use_sphere=use_sphere)

        if scale_data:
            scaler = StandardScaler()
            X_train = scaler.fit_transform(X_train)
            X_test = scaler.transform(X_test)

        if get_ell_from_subset != 0:
            random_subset = random.sample(range(0, X_train.shape[0]),
                                          get_ell_from_subset)
            screener_ell.fit(X_train[random_subset], y_train[random_subset])
        else:
            screener_ell.fit(X_train, y_train)

        scores_screenell = screener_ell.screen(X_train, y_train)
        idx_screenell = np.argsort(scores_screenell)

        print('SCORES_ELL', scores_screenell[:10])

        nb_safe_ell_all += get_nb_safe(scores_screenell,
                                       mu,
                                       classification=True)

        scores_regular = []
        scores_ell = []
        scores_r = []

        nb_to_del_table = None

        if guarantee:
            idx_safeell = np.where(scores_screenell > -mu)[0]
            if len(idx_safeell) != 0:
                estimator_whole = fit_estimator(X_train, y_train, loss,
                                                penalty, mu, lmbda, intercept)
                estimator_screened = fit_estimator(X_train[idx_safeell],
                                                   y_train[idx_safeell], loss,
                                                   penalty, mu, lmbda,
                                                   intercept)
                temp = np.array([
                    estimator_whole.score(X_train, y_train),
                    estimator_screened.score(X_train, y_train)
                ])
                safe_guarantee += temp
                print('SAFE GUARANTEE : ', temp)

        if nb_delete_steps != 0:
            nb_to_del_table = np.sqrt(
                np.linspace(1, X_train.shape[0], nb_delete_steps, dtype='int'))
            nb_to_del_table = np.ceil(
                nb_to_del_table *
                (X_train.shape[0] / nb_to_del_table[-1])).astype(int)

            X_r = X_train
            y_r = y_train

            for i, nb_to_delete in enumerate(nb_to_del_table):
                if i == 0:
                    score_regular = 0
                score_ell = 0
                score_r = 0
                compt = 0

                X_screenell, y_screenell = X_train[idx_screenell[
                    nb_to_delete:]], y_train[idx_screenell[nb_to_delete:]]
                X_r, y_r = X_train[nb_to_delete:], y_train[nb_to_delete:]
                if not (dataset_has_both_labels(y_r)):
                    print(
                        'Warning, only one label in randomly screened dataset')
                if not (dataset_has_both_labels(y_screenell)):
                    print('Warning, only one label in screenell dataset')
                if not (dataset_has_both_labels(y_r)
                        and dataset_has_both_labels(y_screenell)):
                    break
                print('X_train :', X_train.shape, 'X_screenell :',
                      X_screenell.shape, 'X_random : ', X_r.shape)
                while compt < nb_test:
                    compt += 1
                    if i == 0:
                        estimator_regular = fit_estimator(X_train,
                                                          y_train,
                                                          loss=loss,
                                                          penalty=penalty,
                                                          mu=mu,
                                                          lmbda=lmbda,
                                                          intercept=intercept)
                    estimator_screenell = fit_estimator(X_screenell,
                                                        y_screenell,
                                                        loss=loss,
                                                        penalty=penalty,
                                                        mu=mu,
                                                        lmbda=lmbda,
                                                        intercept=intercept)
                    estimator_r = fit_estimator(X_r,
                                                y_r,
                                                loss=loss,
                                                penalty=penalty,
                                                mu=mu,
                                                lmbda=lmbda,
                                                intercept=intercept)

                    if i == 0:
                        score_regular += estimator_regular.score(
                            X_test, y_test)
                    score_ell += estimator_screenell.score(X_test, y_test)
                    score_r += estimator_r.score(X_test, y_test)

                scores_regular.append(score_regular / nb_test)
                scores_ell.append(score_ell / nb_test)
                scores_r.append(score_r / nb_test)

            scores_regular_all.append(scores_regular)
            scores_ell_all.append(scores_ell)
            scores_r_all.append(scores_r)

    print('Number of datapoints we can safely screen with ellipsoid method:',
          nb_safe_ell_all / nb_exp)

    data = {
        'nb_to_del_table': nb_to_del_table,
        'scores_regular': scores_regular_all,
        'scores_ell': scores_ell_all,
        'scores_r': scores_r_all,
        'nb_safe_ell': nb_safe_ell_all / nb_exp,
        'train_set_size': X_train.shape[0],
        'safe_guarantee': safe_guarantee / nb_exp
    }
    save_dataset_folder = os.path.join(RESULTS_PATH, dataset)
    os.makedirs(save_dataset_folder, exist_ok=True)
    if not dontsave:
        np.save(os.path.join(save_dataset_folder, exp_title), data)
        print('RESULTS SAVED!')

    if plot:
        plot_experiment(data, zoom=zoom)

    print('END')

    return
def experiment_tradeoff(dataset, synth_params, size, scale_data, redundant,
                        noise, lmbda, mu, loss, penalty, intercept, acc,
                        rescale, n_ellipsoid_steps, better_init, cut,
                        get_ell_from_subset, clip_ell, use_sphere, guarantee,
                        nb_exp, plot, zoom, dontsave):

    print('START')

    X, y = load_experiment(dataset,
                           synth_params,
                           size,
                           redundant,
                           noise,
                           classification=True)

    if acc:
        exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_acc'.format(
            size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu,
            better_init, cut, clip_ell, use_sphere)
    elif rescale:
        exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff_rescale'.format(
            size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu,
            better_init, cut, clip_ell, use_sphere)
    else:
        exp_title = 'X_size_{}_ell_subset_{}_loss_{}_lmbda_{}_n_ellipsoid_{}_mu_{}_better_init_{}_cut_ell_{}_clip_ell_{}_use_sphere_{}_tradeoff'.format(
            size, get_ell_from_subset, loss, lmbda, n_ellipsoid_steps, mu,
            better_init, cut, clip_ell, use_sphere)
    print(exp_title)

    nb_epochs = int(better_init + n_ellipsoid_steps * get_ell_from_subset /
                    (0.8 * X.shape[0]))
    scores_screening_all = np.zeros(nb_epochs)
    safe_guarantee = np.array([0., 0.])

    compt_exp = 0

    while compt_exp < nb_exp:
        #random.seed(compt_exp + 1)
        #np.random.seed(compt_exp + 1)
        compt_exp += 1
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2)

        if acc:
            for i in range(nb_epochs):
                estimator = LinearSVC(loss='squared_hinge',
                                      dual=False,
                                      C=1 / lmbda,
                                      fit_intercept=False,
                                      max_iter=i + 1,
                                      tol=1.0e-20).fit(X_train, y_train)
                scores_screening_all[i] += estimator.score(X_test, y_test)
                print(scores_screening_all[i])
            print('SCORES', scores_screening_all)
        else:
            for i in range(nb_epochs):
                i = i + 1
                if i <= better_init:
                    screener_dg = DualityGapScreener(lmbda=lmbda,
                                                     n_epochs=i).fit(
                                                         X_train, y_train)
                    z_init = screener_dg.z
                    rad_init = screener_dg.squared_radius
                    scores = screener_dg.screen(X_train, y_train)
                    scores_screening_all[i - 1] += get_nb_safe(
                        scores, mu, classification=True)
                    print('SCREEN DG RADIUS', screener_dg.squared_radius)
                elif better_init < i <= nb_epochs:
                    if rescale:
                        lmbda_ = lmbda * X_train.shape[0] / get_ell_from_subset
                    else:
                        lmbda_ = lmbda
                    random_subset = random.sample(range(0, X_train.shape[0]),
                                                  get_ell_from_subset)
                    screener_ell = EllipsoidScreener(
                        lmbda=lmbda_,
                        mu=mu,
                        loss=loss,
                        penalty=penalty,
                        intercept=intercept,
                        classification=True,
                        n_ellipsoid_steps=int(
                            (i - better_init) * X_train.shape[0] /
                            get_ell_from_subset),
                        better_init=0,
                        better_radius=0,
                        cut=cut,
                        clip_ell=clip_ell,
                        use_sphere=use_sphere).fit(X_train[random_subset],
                                                   y_train[random_subset],
                                                   init=z_init,
                                                   rad=rad_init)
                    scores = screener_ell.screen(X_train, y_train)
                    scores_screening_all[i - 1] += get_nb_safe(
                        scores, mu, classification=True)
                    if use_sphere:
                        print('SCREEN ELL RADIUS', screener_ell.squared_radius)

            if guarantee:
                idx_safeell = np.where(scores > -mu)[0]
                print('SCORES ', scores)
                print('NB TO KEEP', len(idx_safeell))
                if len(idx_safeell) != 0:
                    estimator_whole = fit_estimator(X_train, y_train, loss,
                                                    penalty, mu, lmbda,
                                                    intercept)
                    if rescale:
                        lmbda_ = lmbda * X_train.shape[0] / len(idx_safeell)
                    estimator_screened = fit_estimator(X_train[idx_safeell],
                                                       y_train[idx_safeell],
                                                       loss, penalty, mu,
                                                       lmbda_, intercept)
                    temp = np.array([
                        estimator_whole.score(X_train, y_train),
                        estimator_screened.score(X_train, y_train)
                    ])
                    print('SAFE GUARANTEE : ', temp)
                    safe_guarantee += temp

    if acc:
        scores_screening_all = scores_screening_all * X_train.shape[0]
    data = {
        'step_table':
        better_init + n_ellipsoid_steps *
        (get_ell_from_subset / X_train.shape[0]),
        'scores_screening':
        scores_screening_all / (X_train.shape[0] * nb_exp),
        'safe_guarantee':
        safe_guarantee / nb_exp
    }
    print(data)
    save_dataset_folder = os.path.join(RESULTS_PATH, dataset)
    os.makedirs(save_dataset_folder, exist_ok=True)
    if not dontsave:
        np.save(os.path.join(save_dataset_folder, exp_title), data)
        print('RESULTS SAVED!')

    if plot:
        plot_experiment(data, zoom=zoom)

    print('END')

    return