Esempio n. 1
0
    def test_ridge_singular(self):
        # test on a singular matrix
        rng = np.random.RandomState(0)
        n_samples, n_features = 6, 6
        y = rng.randn(n_samples // 2)
        y = np.concatenate((y, y))
        X = rng.randn(n_samples // 2, n_features)
        X = np.concatenate((X, X), axis=0)

        ridge = RidgeRegression(alpha=0)
        ridge.train(X, y)
        self.assertGreater(ridge.score(X, y), 0.9)
Esempio n. 2
0
    def test_ridge_vs_lstsq(self):
        # On alpha=0., Ridge and ordinary linear regression yield the same solution.
        rng = np.random.RandomState(0)
        # we need more samples than features
        n_samples, n_features = 5, 4
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)

        ridge = RidgeRegression(alpha=0)
        ols = LinearRegression(fit_intercept=False)

        ridge.fit(X, y)
        ols.fit(X, y)
        assert_array_almost_equal(ridge.w, ols.coef_)
Esempio n. 3
0
    def test_ridge_vs_lstsq(self):
        # On alpha=0.,
        # Ridge and ordinary linear regression should yield nearly same solution.
        rng = np.random.RandomState(0)
        # we need more samples than features
        n_samples, n_features = 6, 4
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)

        ridge = RidgeRegression(alpha=0, epoch_num=600, learning_rate=0.1)
        ols = LinearRegression(fit_intercept=True)
        np.random.seed(2020)
        ridge.fit(X, y)
        ols.fit(X, y)
        self.assertTrue(
            np.linalg.norm(ridge.theta.reshape([4]) - ols.coef_) < 0.01)
Esempio n. 4
0
    def test_ridge(self):
        # Ridge regression convergence test, compare to the true value
        rng = np.random.RandomState(0)
        alpha = 1.0

        # With more samples than features
        n_samples, n_features = 6, 5
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)

        ridge = Ridge(alpha=alpha, fit_intercept=False)
        custom_implemented_ridge = RidgeRegression(alpha=alpha)
        ridge.fit(X, y)
        custom_implemented_ridge.fit(X, y)
        self.assertEqual(custom_implemented_ridge.w.shape, (X.shape[1], ))
        self.assertAlmostEqual(ridge.score(X, y),
                               custom_implemented_ridge.score(X, y))
Esempio n. 5
0
    def test_ridge(self):
        # Ridge regression convergence test
        # compare to the implementation of sklearn
        rng = np.random.RandomState(0)
        alpha = 1.0

        # With more samples than features
        n_samples, n_features = 6, 5
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)

        ridge = Ridge(alpha=alpha, fit_intercept=True, solver='sag')
        custom_implemented_ridge = RidgeRegression(alpha=alpha)
        ridge.fit(X, y)
        np.random.seed(2020)
        custom_implemented_ridge.fit(X, y)
        self.assertEqual(custom_implemented_ridge.theta.shape, (X.shape[1], 1))
        self.assertTrue(
            custom_implemented_ridge.score(X, y) > ridge.score(X, y) - 0.1)
def run_2_1(X_train,
            y_train,
            X_val,
            y_val,
            l2reg_search,
            print_table=True,
            PLOT=True):
    num_features = X_train.shape[1]

    scores = np.zeros(len(l2reg_search))

    for i, l2reg in enumerate(l2reg_search):
        ridge_regression = RidgeRegression(l2reg=l2reg)
        ridge_regression.fit(X_train, y_train)
        scores[i] = ridge_regression.score(X_val, y_val)

    if PLOT:
        fig, ax = plt.subplots()
        ax.semilogx(l2reg_search, scores)
        ax.grid()
        ax.set_title("Validation Performance vs. L2 Regularization Parameter")
        ax.set_xlabel("L2-Penalty Regularization Parameter")
        ax.set_ylabel("Average Square Error")
        plt.show()

    #print vertical table of (l2reg, score)
    #TODO later, figure out cleaner way with pandas
    if print_table:
        print "L2_Parameter | Average Square Error"
        for i in xrange(len(l2reg_search)):
            print l2reg_search[i], "|", scores[i]

    #choose L2-parameter that minimizes score
    l2reg_opt = l2reg_search[np.argmin(scores)]

    return l2reg_opt
Esempio n. 7
0
def do_grid_search_ridge(X_train, y_train, X_val, y_val):

    # Now let's use sklearn to help us do hyperparameter tuning
    # GridSearchCv.fit by default splits the data into training and
    # validation itself; we want to use our own splits, so we need to stack our
    # training and validation sets together, and supply an index
    # (validation_fold) to specify which entries are train and which are
    # validation.
    X_train_val = np.vstack((X_train, X_val))
    y_train_val = np.concatenate((y_train, y_val))
    val_fold = [-1] * len(X_train) + [0] * len(
        X_val)  #0 corresponds to validation

    # Now we set up and do the grid search over l2reg. The np.concatenate
    # command illustrates my search for the best hyperparameter. In each line,
    # I'm zooming in to a particular hyperparameter range that showed promise
    # in the previous grid. This approach works reasonably well when
    # performance is convex as a function of the hyperparameter, which it seems
    # to be here.
    # param_grid = [{'l2reg':np.unique(np.concatenate((10.**np.arange(-6,1,1),
    #                                        np.arange(1,3,.3)
    #                                          ))) }]
    param_grid = [{'l2reg': np.unique(10.**np.arange(-3, 0.5, 0.1))}]

    ridge_regression_estimator = RidgeRegression()  # initialize estimator
    grid = GridSearchCV(
        ridge_regression_estimator,  # makes use of BaseEstimator wrapper
        param_grid,
        return_train_score=True,
        cv=PredefinedSplit(test_fold=val_fold),
        refit=True,
        scoring=make_scorer(mean_squared_error, greater_is_better=False))
    grid.fit(X_train_val, y_train_val)

    df = pd.DataFrame(grid.cv_results_)
    # Flip sign of score back, because GridSearchCV likes to maximize,
    # so it flips the sign of the score if "greater_is_better=FALSE"
    df['mean_test_score'] = -df['mean_test_score']
    df['mean_train_score'] = -df['mean_train_score']
    cols_to_keep = ["param_l2reg", "mean_test_score", "mean_train_score"]
    df_toshow = df[cols_to_keep].fillna('-')
    df_toshow = df_toshow.sort_values(by=["param_l2reg"])
    return grid, df_toshow
Esempio n. 8
0
    # # # # # # # # # # # # # # # #
    print("Running Nystroem Approximation")
    factor = np.max(np.linalg.norm(train_features, axis=1))
    train_features /= factor
    test_features /= factor
    dim = min(train_features.shape[1] * 2, train_features.shape[0])
    print("Nystroem dim is {}".format(dim))

    # Use the Nyström approximation in sklearn
    approx = Nystroem(kernel='rbf', gamma=1., n_components=dim)
    approx.fit(train_features)
    train_features = approx.transform(train_features)
    test_features = approx.transform(test_features)

    # # # # # # # # # # # # # # # #
    # Ridge regression with cross validation
    # # # # # # # # # # # # # # # #

    style = 'c' if train_features.shape[0] > train_features.shape[1] else 'k'
    clf = GridSearchCV(RidgeRegression(), {
        'alpha': [(10**i) for i in range(-7, 0)],
        'style': [style]
    },
                       n_jobs=4)

    clf.fit(train_features, train_onehot)
    y_pred_ = np.argmax(clf.predict(test_features), axis=-1)
    acc = sum(y_pred_ == targets["test"]) * 1.0 / len(targets["test"])

    print(acc)
Esempio n. 9
0
def main():

    # Load problem
    lasso_data_fname = "lasso_data.pickle"
    x_train, y_train, x_val, y_val, target_fn, coefs_true, featurize = load_problem(
        lasso_data_fname)

    # Generate features
    X_train = featurize(x_train)
    X_val = featurize(x_val)

    #Visualize training data
    # fig, ax = plt.subplots()
    # ax.imshow(X_train)
    # ax.set_title("Design Matrix: Color is Feature Value")
    # ax.set_xlabel("Feature Index")
    # ax.set_ylabel("Example Number")
    # plt.show(block=False)

    # Do hyperparameter tuning with our ridge regression
    # this is done on the training and validation set
    grid, results = do_grid_search_ridge(X_train, y_train, X_val, y_val)
    print(results)

    # Plot validation performance vs regularization parameter
    fig, ax = plt.subplots()

    # ax.loglog(results["param_l2reg"], results["mean_test_score"])
    ax.semilogx(results["param_l2reg"], results["mean_test_score"])
    ax.grid()
    ax.set_title("Validation Performance vs L2 Regularization")
    ax.set_xlabel("L2-Penalty Regularization Parameter")
    ax.set_ylabel("Mean Squared Error")
    plt.show()

    # Let's plot prediction functions and compare coefficients for several fits
    # and the target function.
    pred_fns = []
    x = np.sort(np.concatenate([np.arange(0, 1, .001), x_train]))
    name = "Target Parameter Values (i.e. Bayes Optimal)"
    pred_fns.append({"name": name, "coefs": coefs_true, "preds": target_fn(x)})

    l2regs = [0, grid.best_params_['l2reg'], 1]
    X = featurize(x)
    for l2reg in l2regs:  # for every chosen regularization constant
        ridge_regression_estimator = RidgeRegression(l2reg=l2reg)  # fit
        ridge_regression_estimator.fit(X_train, y_train)
        name = "Ridge with L2Reg=" + str(l2reg)
        pred_fns.append({
            "name": name,
            "coefs": ridge_regression_estimator.w_,
            "preds": ridge_regression_estimator.predict(X)
        })

    # f = plot_prediction_functions(x, pred_fns, x_train, y_train, legend_loc="best")
    # plt.show()

    # f = compare_parameter_vectors(pred_fns)
    # plt.show()

    # confusion matrix for different cutoff params
    cutoffs = [10**(-3), 10**(-2), 10**(-1)]
    best = pred_fns[1]
    ctf_fns = []
    for cutoff in cutoffs:
        ridge_regression_estimator = RidgeRegression()
        W = [w * (abs(w) > cutoff) for w in best["coefs"]]
        ridge_regression_estimator.w_ = W
        name = "Ridge with cutoff=" + str(cutoff)
        ctf_fns.append({
            "name": name,
            "coefs": W,
            "preds": ridge_regression_estimator.predict(X)
        })

    f = plot_prediction_functions(x,
                                  ctf_fns,
                                  x_train,
                                  y_train,
                                  legend_loc="best")
    plt.show()
def main():
    if len(argv) == 2:
        program, sol = argv
    else:
        raise RuntimeError("USAGE: python solution.py OPTION[2 or 3]")

    # load data and split data
    lasso_data_fname = "lasso_data.pickle"
    x_train, y_train, x_val, y_val, target_fn, coefs_true, featurize = load_problem(
        lasso_data_fname)

    # turn from 1D binary data to high dimensional featurized data
    X_train = featurize(x_train)
    X_val = featurize(x_val)

    if sol == "2":
        #### 2.1
        # create array of possible L2Reg parameters
        l2reg_search = 10.**np.arange(-6, +1, 1)
        # search through l2reg_search
        l2reg_opt = run_2_1(X_train,
                            y_train,
                            X_val,
                            y_val,
                            l2reg_search,
                            print_table=False,
                            PLOT=False)

        #### 2.2
        # x has many inputs from 0 to 1, as well as the x_train inputs, to help plotting
        x = np.sort(np.concatenate([np.arange(0, 1, .001), x_train]))
        X = featurize(x)

        # pred_fns is a list of dicts with "name", "coefs" and "preds"
        pred_fns = []
        coefs_opt = 0  #for question 2.3
        # first entry: Target function
        pred_fns.append({
            "name": "Target",
            "coefs": coefs_true,
            "preds": target_fn(x)
        })

        l2reg_values = [0, l2reg_opt]
        # next entries: prediction functions for L2Reg parameters in l2reg_values
        for l2reg in l2reg_values:
            ridge = RidgeRegression(l2reg=l2reg)
            ridge.fit(X_train, y_train)
            pred_fns.append({
                "name": "Ridge with L2Reg=" + str(l2reg),
                "coefs": ridge.w_,
                "preds": ridge.predict(X)
            })
            # for question 2.3
            if l2reg == l2reg_opt:
                coefs_opt = ridge.w_
        # with pred_fns populated, plot
        # "PRED": prediction functions
        # "COEF": coefficients
        plots = ["PRED", "COEF"]
        #plots=[]
        run_2_2(x, x_train, y_train, pred_fns, plot=plots)

        #### 2.3
        epsilon = []
        #epsilon = [1e-6, 1e-3, 1e-2, 5e-2, 1e-1, 5e-1]
        for e in epsilon:
            run_2_3(coefs_true, coefs_opt, epsilon=e)

    if sol == "3":
        #### 3.2 - experiment with Lasso
        # Found that start="RR", order="cyclic", epsilon=1e-8 works MARGINALLY better
        #run_3_2(X_train, y_train, X_val, y_val, l1reg=1, epsilons=[1e-8, 1e-3])

        #### 3.3
        #### Part a: find optimal l1reg
        # create array of possible L1Reg parameters
        #l1reg_search = 10.**np.arange(-6, 2, 1)
        # search through l1reg_search
        #l1reg_opt = run_3_3_a(X_train, y_train, X_val, y_val, l1reg_search)
        l1reg_opt = 1.0  # found from above

        #### 3.3
        #### Part b: plot corresponding prediction function
        # x has many inputs from 0 to 1, as well as the x_train inputs, to help plotting
        x = np.sort(np.concatenate([np.arange(0, 1, .001), x_train]))
        X = featurize(x)

        # pred_fns is a list of dicts with "name", "coefs" and "preds"
        pred_fns = []
        # first entry: Target function
        pred_fns.append({
            "name": "Target",
            "coefs": coefs_true,
            "preds": target_fn(x)
        })

        lasso = LassoRegression(l1reg=l1reg_opt)
        lasso.shooting_alg(X_train, y_train)

        pred_fns.append({
            "name": "Ridge with L1Reg=" + str(l1reg_opt),
            "coefs": lasso.w,
            "preds": lasso.predict(X)
        })

        # with pred_fns populated, plot
        # "PRED": prediction functions
        # "COEF": coefficients
        run_3_3_b(x, x_train, y_train, pred_fns, plot=[])

        run_3_4(X_train, y_train, X_val, y_val, p=0.8)
Esempio n. 11
0
                           bounds=[(0.0, None)],
                           options={
                               'xtol': 1e-6,
                               'disp': True
                           })
            loss = f(res.x)
            print("fixed design opt reg and loss", res.x, loss)
            args.l2_reg = max(res.x[0], EPS)
    else:
        # construct model
        if args.model == "logistic_regression":
            model = LogisticRegression(input_dim=kernel_approx.n_feat,
                                       n_class=n_class,
                                       reg_lambda=args.l2_reg)
        elif args.model == "ridge_regression":
            model = RidgeRegression(input_dim=kernel_approx.n_feat,
                                    reg_lambda=args.l2_reg)
        if use_cuda:
            model.cuda()
        model.double()

        # set up optimizer
        if args.opt == "sgd":
            print("using sgd optimizer")
            optimizer = optim.SGD(model.parameters(),
                                  lr=args.learning_rate,
                                  weight_decay=args.l2_reg)
        elif args.opt == "lpsgd":
            print("using lp sgd optimizer")
            optimizer = halp.optim.LPSGD(model.parameters(),
                                         lr=args.learning_rate,
                                         scale_factor=args.scale_model,
Esempio n. 12
0
        mlp = MLP(epoch_num=400, batch_size=batch_size, learning_rate=0.1)
        np.random.seed(2020)
        mlp.train(x_train, y_train)
        y_predict = mlp.predict(x_train)
        self.assertTrue(numerical_accuracy(y_predict, y_train) > 0.95)

    def test_xor(self):
        X = np.array([0, 0, 1, 1, 0, 1, 1, 0], dtype=np.float32).reshape(4, 2)
        Y = np.array([0, 0, 1, 1], dtype=np.float32)
        mlp = MLP(hidden_layer_sizes=(2, ), epoch_num=1600, learning_rate=0.22)
        np.random.seed(2020)
        mlp.train(X, Y)
        self.assertAlmostEqual(numerical_accuracy(mlp.predict(X), Y), 1.0)


@unittest.skipIf(RidgeRegression().skip, 'skip bonus question')
class TestRidgeModel(unittest.TestCase):
    def test_ridge(self):
        # Ridge regression convergence test
        # compare to the implementation of sklearn
        rng = np.random.RandomState(0)
        alpha = 1.0

        # With more samples than features
        n_samples, n_features = 6, 5
        y = rng.randn(n_samples)
        X = rng.randn(n_samples, n_features)

        ridge = Ridge(alpha=alpha, fit_intercept=True, solver='sag')
        custom_implemented_ridge = RidgeRegression(alpha=alpha)
        ridge.fit(X, y)
Esempio n. 13
0
def expr(A, b, lmbd, num_cols_list, num_iters=50):
    """
    Perform experiments using the iterative ridge regression algorithm
      comparing various sampling methods (uniform, leverage scores, and
      ridge leverage scores) by evaluating the relative errors and
      objective errors of the solutions obtained using each sampling method
      with varying sketch sizes and at each iteration.

    Args:
        A: array, design matrix.
        b: array, response vector.
        num_cols_list: array, list of sketch sizes used in the experiments.
        num_iters: int, maximum number of iterations to run the algorithm.

    Returns:
        rel_errs_unif, rel_errs_levr, rel_errs_rdge: each of type
            array(len(num_cols_list), num_iters)), relative errors.
        obj_errs_unif, obj_errs_levr, obj_errs_rdge: each of type
            array(len(num_cols_list), num_iters)), objective errors.
    """
    model = RidgeRegression(A, b, lmbd=lmbd)
    x_opt = model.direct_solver()
    obj_opt = model.obj_vals(x_opt)

    num_cols_list = map(int, num_cols_list)

    rel_errs_unif = np.zeros((len(num_cols_list), num_iters))
    rel_errs_levr = np.zeros((len(num_cols_list), num_iters))
    rel_errs_rdge = np.zeros((len(num_cols_list), num_iters))

    obj_errs_unif = np.zeros((len(num_cols_list), num_iters))
    obj_errs_levr = np.zeros((len(num_cols_list), num_iters))
    obj_errs_rdge = np.zeros((len(num_cols_list), num_iters))

    for k, num_cols in enumerate(num_cols_list):
        print "k = %d; number of sampled columns = %d\n" % (k, num_cols)

        probs_unif = np.ones(d) / d
        probs_levr = model.leverage_scores()
        probs_rdge = model.ridge_leverage_scores()

        _, x_unif = model.iterative_solver(num_cols, num_iters, probs=probs_unif)
        _, x_levr = model.iterative_solver(num_cols, num_iters, probs=probs_levr)
        _, x_rdge = model.iterative_solver(num_cols, num_iters, probs=probs_rdge)

        rel_errs_unif[k] = model.rel_err(x_unif, x_opt)
        rel_errs_levr[k] = model.rel_err(x_levr, x_opt)
        rel_errs_rdge[k] = model.rel_err(x_rdge, x_opt)

        obj_errs_unif[k] = model.obj_vals(x_unif) / obj_opt - 1.
        obj_errs_levr[k] = model.obj_vals(x_levr) / obj_opt - 1.
        obj_errs_rdge[k] = model.obj_vals(x_rdge) / obj_opt - 1.

    return rel_errs_unif, rel_errs_levr, rel_errs_rdge, \
        obj_errs_unif, obj_errs_levr, obj_errs_rdge