Example #1
0
    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd
        if (scipy.sparse.issparse(self.X)
                and scipy.sparse.isspmatrix_csc(self.X)):
            self.X = scipy.sparse.csr_matrix(self.X)

        self.solver = BinaryClassifier(loss='logistic',
                                       penalty='l1',
                                       fit_intercept=False)
        self.solver_parameter = dict(lambd=self.lmbd / self.X.shape[0],
                                     solver='auto',
                                     it0=1000000,
                                     tol=1e-12,
                                     verbose=False)
    def fit(self, X_train, y_train, init=None):
        start = time.time()
        self.X_train = X_train
        self.y_train = y_train

        if self.ars:
            svc = BinaryClassifier(loss='sqhinge', penalty='l2')
            if init is not None:
                svc.w = init
                restart = True
            else:
                restart = False
            #self.first_obj, self.first_dg =  self.get_duality_gap(svc)
            info = svc.fit(X_train,
                           y_train,
                           lambd=self.lmbda,
                           solver='qning-svrg',
                           max_epochs=self.n_epochs,
                           it0=1,
                           tol=1.0e-20,
                           restart=restart,
                           verbose=False)
            self.loss = info[1, -1]
            self.dg = self.loss - info[2, -1]
            self.z = svc.w.reshape(-1, )
        else:
            first_svc = LinearSVC(loss='squared_hinge',
                                  dual=False,
                                  C=1 / self.lmbda,
                                  fit_intercept=False,
                                  max_iter=0,
                                  tol=1.0e-20).fit(self.X_train, self.y_train)
            self.first_obj, self.first_dg = self.get_duality_gap(first_svc)
            svc = LinearSVC(loss='squared_hinge',
                            dual=False,
                            C=1 / self.lmbda,
                            fit_intercept=False,
                            max_iter=self.n_epochs,
                            tol=1.0e-20).fit(self.X_train, self.y_train)
            self.z = svc.coef_.reshape(-1, )
            self.loss, self.dg = self.get_duality_gap(svc)
        self.squared_radius = 2 * self.dg / self.lmbda
        end = time.time()
        print('Time to fit DualityGapScreener :', end - start)
        return self
Example #3
0
def fit_estimator(X,
                  y,
                  loss,
                  penalty,
                  mu,
                  lmbda,
                  intercept,
                  max_iter=10000,
                  ars=False):
    if loss == 'truncated_squared' and penalty == 'l1':
        estimator = Lasso(alpha=lmbda,
                          fit_intercept=intercept,
                          max_iter=max_iter).fit(X, y)
    elif loss == 'squared' and penalty == 'l1':
        estimator = Lasso(alpha=lmbda,
                          fit_intercept=intercept,
                          max_iter=max_iter).fit(X, y)
    elif loss == 'hinge' and penalty == 'l2':
        estimator = LinearSVC(C=1 / lmbda,
                              loss=loss,
                              penalty=penalty,
                              fit_intercept=intercept,
                              max_iter=max_iter).fit(X, y)
    elif loss == 'squared_hinge' and not (ars):
        estimator = LinearSVC(C=1 / lmbda,
                              loss=loss,
                              dual=False,
                              penalty=penalty,
                              fit_intercept=intercept,
                              max_iter=1000).fit(X, y)
    elif loss == 'squared_hinge' and ars:
        estimator = BinaryClassifier(loss='sqhinge',
                                     penalty=penalty,
                                     fit_intercept=intercept)
        estimator.fit(X,
                      y,
                      lambd=lmbda,
                      solver='catalyst-miso',
                      nepochs=max_iter,
                      verbose=False)
    elif loss == 'safe_logistic':
        estimator = SafeLogistic(lmbda=lmbda,
                                 penalty=penalty,
                                 max_iter=max_iter).fit(X, y)
    elif loss == 'logistic':
        estimator = LogisticRegression(C=1 / lmbda,
                                       penalty=penalty,
                                       fit_intercept=intercept).fit(X, y)
    else:
        print('ERROR, you picked a combination which is not implemented.')
    return estimator
Example #4
0
class Solver(BaseSolver):
    name = 'Cyanure'

    install_cmd = 'pip'
    package_name = 'cyanure-mkl'
    package_import = 'cyanure'

    def set_objective(self, X, y, lmbd):
        self.X, self.y, self.lmbd = X, y, lmbd

        self.solver = BinaryClassifier(loss='logistic', penalty='l1',
                                       fit_intercept=False)
        self.solver_parameter = dict(
            lambd=self.lmbd / self.X.shape[0],
            tol=1e-12, verbose=False
        )

    def run(self, n_iter):
        self.solver.fit(self.X, self.y, max_epochs=n_iter,
                        **self.solver_parameter)

    def get_result(self):
        return self.solver.get_weights()
def experiment_regpath(dataset, synth_params, size, scale_data, redundant,
                       noise, lmbda_grid_start, lmbda_grid_end, lmbda_grid_num,
                       mu, loss, penalty, intercept, n_ellipsoid_steps,
                       n_epochs, n_epochs_ell_path, cut, get_ell_from_subset,
                       clip_ell, use_sphere, nb_exp, dontsave):

    print('START')

    exp_title = 'X_size_{}_ell_subset_{}_loss_{}_n_ell_{}_mu_{}_cut_ell_{}_n_epochs_{}_n_ell_path_{}_use_sphere_{}_start_{}_end_{}_num_{}_regpath'.format(
        size, get_ell_from_subset, loss, n_ellipsoid_steps, mu, cut, n_epochs,
        n_epochs_ell_path, use_sphere, lmbda_grid_start, lmbda_grid_end,
        lmbda_grid_num)
    print(exp_title)

    X, y = load_experiment(dataset,
                           synth_params,
                           size,
                           redundant,
                           noise,
                           classification=True)

    data = {}

    lmbda_grid = np.logspace(lmbda_grid_start,
                             lmbda_grid_end,
                             num=lmbda_grid_num)
    for lmbda in lmbda_grid:
        data['budget_ell_lmbda_{}'.format(lmbda)] = 0
        data['budget_noscreen_lmbda_{}'.format(lmbda)] = 0
        data['score_ell_lmbda_{}'.format(lmbda)] = 0
        data['score_noscreen_lmbda_{}'.format(lmbda)] = 0
    compt_exp = 0

    while compt_exp < nb_exp:
        random.seed(compt_exp + 1)
        np.random.seed(compt_exp + 1)
        compt_exp += 1
        X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2)

        for lmbda in lmbda_grid:
            print('---------- LMBDA ---------: ', lmbda)
            budget_ell = 0
            budget_noscreen = 0
            if lmbda == lmbda_grid[0]:
                screener_ell = EllipsoidScreener(
                    lmbda=lmbda * X_train.shape[0] / get_ell_from_subset,
                    mu=mu,
                    loss=loss,
                    penalty=penalty,
                    intercept=intercept,
                    classification=True,
                    n_ellipsoid_steps=n_ellipsoid_steps,
                    cut=cut,
                    clip_ell=clip_ell,
                    use_sphere=use_sphere,
                    ars=True)
                screener_dg = DualityGapScreener(lmbda=lmbda,
                                                 n_epochs=n_epochs,
                                                 ars=True)
                screener_dg.fit(X_train, y_train)
                print('Init radius : ', screener_dg.squared_radius)
                random_subset = random.sample(range(0, X_train.shape[0]),
                                              get_ell_from_subset)
                screener_ell.fit(X_train[random_subset],
                                 y_train[random_subset],
                                 init=screener_dg.z,
                                 rad=screener_dg.squared_radius)

                svc = BinaryClassifier(loss='sqhinge',
                                       penalty=penalty,
                                       fit_intercept=intercept)
                svc.fit(X_train,
                        y_train,
                        solver='qning-svrg',
                        lambd=lmbda,
                        verbose=False)

                svc_ell = BinaryClassifier(loss='sqhinge',
                                           penalty=penalty,
                                           fit_intercept=intercept)
                svc_ell.fit(X_train,
                            y_train,
                            solver='qning-svrg',
                            lambd=lmbda,
                            verbose=False)

            else:

                budget_fit_solver_noscreen = svc.fit(X_train,
                                                     y_train,
                                                     solver='qning-svrg',
                                                     it0=1,
                                                     lambd=lmbda,
                                                     restart=True,
                                                     verbose=False)[0, -1]
                print('Epoch fit solver no screen :',
                      budget_fit_solver_noscreen)
                budget_noscreen += budget_fit_solver_noscreen * X_train.shape[0]
                print('Budget solver no screen :', budget_noscreen)

                info = svc_ell.fit(X_train,
                                   y_train,
                                   solver='qning-svrg',
                                   lambd=lmbda,
                                   verbose=False,
                                   max_epochs=n_epochs_ell_path,
                                   it0=1,
                                   restart=True)
                dg = info[1, -1] - info[2, -1]

                screener_ell = EllipsoidScreener(
                    lmbda=lmbda * X_train.shape[0] / get_ell_from_subset,
                    mu=mu,
                    loss=loss,
                    penalty=penalty,
                    intercept=intercept,
                    classification=True,
                    n_ellipsoid_steps=n_ellipsoid_steps,
                    cut=cut,
                    clip_ell=clip_ell,
                    use_sphere=use_sphere,
                    ars=True)
                random_subset = random.sample(range(0, X_train.shape[0]),
                                              get_ell_from_subset)
                print('Init rad : ', 2 * dg / lmbda)
                screener_ell.fit(X_train[random_subset],
                                 y_train[random_subset],
                                 init=svc_ell.w,
                                 rad=2 * dg / lmbda)
                if use_sphere and n_ellipsoid_steps > 0:
                    print('Final rad : ', screener_ell.squared_radius)

                scores_ell = screener_ell.screen(X_train, y_train)
                tokeep = np.where(scores_ell > -mu)[0]
                print('To keep : ', len(tokeep))

                budget_fit_solver = svc_ell.fit(
                    X_train[tokeep],
                    y_train[tokeep],
                    solver='qning-svrg',
                    it0=1,
                    lambd=lmbda * (X_train.shape[0] / len(tokeep)),
                    restart=True,
                    verbose=False)[0, -1]

                budget_init_ell = (n_epochs_ell_path) * X_train.shape[0]
                budget_fit_ell = n_ellipsoid_steps * get_ell_from_subset
                if cut:
                    budget_fit_ell += get_ell_from_subset
                budget_ell += budget_init_ell + budget_fit_ell + budget_fit_solver * len(
                    tokeep)

                print('Epoch fit solver screen', budget_fit_solver)
                print('Budget solver screen : ', budget_init_ell,
                      budget_fit_ell, budget_fit_solver * len(tokeep))

            score_ell = svc_ell.score(X_train, y_train)
            score_noscreen = svc.score(X_train, y_train)

            print('Score on screened : ', score_ell, 'Score on whole : ',
                  score_noscreen)

            data['budget_ell_lmbda_{}'.format(lmbda)] += budget_ell
            data['budget_noscreen_lmbda_{}'.format(lmbda)] += budget_noscreen
            data['score_ell_lmbda_{}'.format(lmbda)] += score_ell
            data['score_noscreen_lmbda_{}'.format(lmbda)] += score_noscreen

    data = {k: float(data[k] / nb_exp) for k in data}
    save_dataset_folder = os.path.join(RESULTS_PATH, dataset)
    os.makedirs(save_dataset_folder, exist_ok=True)
    if not dontsave:
        np.save(os.path.join(save_dataset_folder, exp_title), data)
        print('RESULTS SAVED!')

    print('END')

    print(data)

    return
                           size=60000,
                           redundant=0,
                           noise=None,
                           classification=True)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    prop = np.unique(y_test, return_counts=True)[1]
    print('BASELINE : ', 1 - prop[1] / prop[0])

    screener = DualityGapScreener(lmbda=1e-5, n_epochs=9,
                                  ars=True).fit(X_train, y_train)
    print('Squared Radius : ', 2 * screener.dg / 1e-5)
    print('Score : ', screener.score(X_test, y_test))

    svc_ell = BinaryClassifier(loss='sqhinge', penalty='l2')
    budget_fit_solver = svc_ell.fit(
        X_train,
        y_train,
        solver='qning-svrg',
        it0=1,
        lambd=1e-5,
        verbose=False)[0, -1]  #* X_train.shape[0] #len(tokeep_ell)
    print(budget_fit_solver, budget_fit_solver * X_train.shape[0])

    scores = screener.screen(X_train, y_train)
    tokeep = np.where(scores > -1.0)[0]
    print('Nb kept : ', len(tokeep))
    svc_ell = BinaryClassifier(loss='sqhinge', penalty='l2')
    budget_fit_solver = svc_ell.fit(
        X_train[tokeep],