예제 #1
0
def sjlt_error_vs_iterations():
    n = 6_000
    d = 200
    gamma_vals = [5]  #[4,6,8]
    sketch_size = int(gamma_vals[0] * d)
    col_sparsities = [1, 4, 16]
    number_iterations = 20  # 40 #np.asarray(np.linspace(5,40,8), dtype=np.int)
    # Output dictionaries
    error_to_lsq = {}  #{sketch_name : {} for sketch_name in sketches}
    error_to_truth = {}  #{sketch_name : {} for sketch_name in sketches}
    for s in col_sparsities:
        error_to_lsq[s] = []
        error_to_truth[s] = []
    print(error_to_lsq)
    print(error_to_truth)

    X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0)

    # Least squares estimator
    x_opt = np.linalg.lstsq(X, y)[0]
    lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star)))

    for s in col_sparsities:
        col_sparsity = s
        print("Testing col sparsity: {}, num_iterations: {}".format(
            col_sparsity, number_iterations))
        for sketch_method in sketches:
            #lsq_error, truth_error = 0,0
            lsq_error = np.zeros((number_iterations, ))
            truth_error = np.zeros_like(lsq_error)

            my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity)
            for trial in range(NTRIALS):
                print('*' * 80)
                print("{}, trial: {}".format(sketch_method, trial))
                x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors(
                    number_iterations)
                for _ in range(x_iters.shape[1]):
                    lsq_error[_] += prediction_error(X, x_iters[:, _], x_opt)
                    truth_error[_] += prediction_error(X, x_iters[:, _],
                                                       x_star)
                print(lsq_error)
                # lsq_error += prediction_error(X,x_ihs, x_opt)
                # truth_error += prediction_error(X,x_ihs, x_star)
            mean_lsq_error = lsq_error / NTRIALS
            mean_truth_error = truth_error / NTRIALS
            print(mean_lsq_error)
            # error_to_lsq[sketch_method][gamma].append(mean_lsq_error)
            # error_to_truth[sketch_method][gamma].append(mean_truth_error)
            error_to_lsq[s] = mean_lsq_error
            error_to_truth[s] = mean_truth_error
    pretty = PrettyPrinter(indent=4)
    pretty.pprint(error_to_lsq)
    pretty.pprint(error_to_truth)

    # Save the dictionaries
    save_dir = '../../output/ihs_baselines//'
    np.save(save_dir + 'sjlt_error_sparsity_opt', error_to_lsq)
    np.save(save_dir + 'sjlt_error_sparsity_truth', error_to_truth)
예제 #2
0
def test_ihs_initialises(all_sketch_methods):
    '''Checks that the __init__ function is correctly entered and exited from
    the ihs functions'''
    X,y = make_regression(1000,2)
    sketch_dimension = 100
    for sketch_method in all_sketch_methods:
        my_ihs = ihs(X,y,sketch_method,sketch_dimension)
        assert np.array_equal(my_ihs.A,X)
        assert np.array_equal(my_ihs.b,y)
        assert my_ihs.sketch_method == sketch_method
        assert my_ihs.sketch_dimension == sketch_dimension
def single_exp(_trial, n, d, X, y, sketch_size, sketch_method, run_time,
               sklearn_lasso_bound):
    # for _trial in range(trials):
    print("Trial {}".format(_trial))
    shuffled_ids = np.random.permutation(n)
    X_train, y_train = X[shuffled_ids, :], y[shuffled_ids]
    my_ihs = ihs(X_train, y_train, sketch_method, sketch_size)
    x_ihs, error_track = my_ihs.lasso_fit_new_sketch_timing(
        sklearn_lasso_bound, run_time)
    iters_used = error_track.shape[1]
    return x_ihs, iters_used
def error_vs_dimensionality():
    dimension = [2**i for i in range(4, 9)]
    METHODS = sketches + ['Exact', 'Sketch & Solve']

    # Output dictionaries
    error_to_truth = {_: {} for _ in METHODS}
    for _ in METHODS:
        for d in dimension:
            error_to_truth[_][d] = 0
    print(error_to_truth)

    for d in dimension:
        n = 100 * d
        print(f'TESTING {n},{d}')
        ii = dimension.index(d)
        sampling_rate = 10
        num_iterations = 5
        for method in METHODS:
            if method == 'sjlt':
                col_sparsity = 4
            else:
                col_sparsity = 1
            for trial in range(NTRIALS):
                # Generate the data
                X, y, x_star = gaussian_design_unconstrained(n, d, 1.0)
                if method is "Exact":
                    print('Exact method.')
                    x_hat = np.linalg.lstsq(X, y)[0]

                elif method is "Sketch & Solve":
                    sketch_size = sampling_rate * num_iterations * d
                    print(f"S&S with {sketch_size} sketch size")
                    _sketch = rp(X, sketch_size, 'countSketch', col_sparsity)
                    SA, Sb = _sketch.sketch_data_targets(y)
                    x_hat = np.linalg.lstsq(SA, Sb)[0]
                else:
                    sketch_size = sampling_rate * d
                    print(
                        f"Using {num_iterations} iterations, sketch_size {sketch_size} and {method}"
                    )
                    my_ihs = ihs(X, y, method, sketch_size, col_sparsity)
                    x_hat = my_ihs.ols_fit_new_sketch(num_iterations)

                error = (prediction_error(X, x_star, x_hat))**(0.5)
                error_to_truth[method][d] += error
    for _ in METHODS:
        for d in dimension:
            error_to_truth[_][d] /= NTRIALS
    error_to_truth['Dimensions'] = dimension
    pretty = PrettyPrinter(indent=4)
    pretty.pprint(error_to_truth)
    save_dir = '../../output/ihs_baselines/'
    np.save(save_dir + 'error_vs_dims', error_to_truth)
예제 #5
0
def test_ols_new_sketch_per_iteration(all_sketch_methods):
    '''
    Test that using IHS and generating a new sketch every iteration yields
    an approximation close to the true estimator.'''
    X,y,_ = gaussian_design_unconstrained(2**13,50,variance=2.5)
    x_opt = np.linalg.lstsq(X,y,rcond=None)[0] # rcond just to suppres warning as per docs
    for sketch_method in all_sketch_methods:
        my_ihs = ihs(X,y,sketch_method,500)
        x_ihs = my_ihs.ols_fit_new_sketch(iterations=20)
        x_ihs_track, error_track = my_ihs.ols_fit_new_sketch_track_errors(iterations=20)
        print(sketch_method, np.linalg.norm(x_ihs - x_opt))
        print(f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}')
        assert np.allclose(x_opt,x_ihs)
        assert np.allclose(x_opt,x_ihs_track)
예제 #6
0
def test_ols_one_sketch_per_iteration(all_sketch_methods):
    '''
    Test that using IHS and generating *A SINGLE* sketch yields
    an approximation close to the true estimator.

    Need a larger sketch compared to the test with a new sketch for every
    iteration'''
    X,y,_ = gaussian_design_unconstrained(2**13,50,variance=2.5)
    x_opt = np.linalg.lstsq(X,y,rcond=None)[0] # rcond just to suppres warning as per docs
    for sketch_method in all_sketch_methods:
        my_ihs = ihs(X,y,sketch_method,1000)
        x_ihs = my_ihs.ols_fit_one_sketch(iterations=50)
        x_ihs_track, error_track = my_ihs.ols_fit_one_sketch_track_errors(iterations=20)
        print(sketch_method, np.linalg.norm(x_ihs - x_opt))
        print(f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}')
        #assert np.isclose(x_opt,x_ihs)
        np.testing.assert_array_almost_equal(x_ihs,x_opt)
        assert np.allclose(x_opt,x_ihs_track)
def test_lasso_solver_time(all_sketch_methods):
    '''
    Tests that the lasso qp solver gives the same answers
    as the sklearn linear model.
    Generate the sklearn solution first, then take
    then norm and compare.

    nb. We don't compare to sklearn as there is not a
    clean matching between the regularising parameters
    so only check the global and iterative QPs agree.
    '''
    X, y, x_star = gaussian_design_unconstrained(2000, 10, 1.0)
    n, d = X.shape
    ell_1_bound = 100.0
    # _lambda = 100.0
    # lassoModel = Lasso(alpha=1.0 ,max_iter=1000)
    # sklearn_X, sklearn_y = np.sqrt(n)*X, np.sqrt(n)*y
    # lassoModel.fit(sklearn_X, sklearn_y)
    # x_opt = lassoModel.coef_

    x_opt = lasso_solver(X, y, ell_1_bound)
    x0 = np.zeros((d, ))

    for sketch_method in all_sketch_methods:
        my_ihs = ihs(X, y, sketch_method, 500)
        x_ihs_track, error_track = my_ihs.lasso_fit_new_sketch_timing(
            ell_1_bound, 1.5)
        final_sol_error = (1 / n) * np.linalg.norm(
            X @ (x_ihs_track - x_opt))**2
        print(
            f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}'
        )
        print("log Error to opt: {}".format(np.log(final_sol_error)))
        print(f"{error_track.shape[1]} iterations completed")
        print(np.c_[x_opt, x_ihs_track])
        assert np.allclose(x_opt, x_ihs_track, 1E-1)
def error_vs_iterations():
    n = 6_000
    d = 200
    gamma_vals = [5]
    number_iterations = 30

    # Output dictionaries indexed by:
    # sketch method (sketches) --> sketch size (gamma_vals) --> STEPSIZE
    error_to_lsq = {sketch_name: {} for sketch_name in sketches}
    error_to_truth = {sketch_name: {} for sketch_name in sketches}
    for sketch_name in sketches:
        for gamma in gamma_vals:
            error_to_lsq[sketch_name][gamma] = {}
            error_to_truth[sketch_name][gamma] = {}
            for step in STEPSIZE:
                error_to_lsq[sketch_name][gamma][step] = []
                error_to_truth[sketch_name][gamma][step] = []

    X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0)

    # # Least squares estimator
    x_opt = np.linalg.lstsq(X, y)[0]
    print('-' * 80)
    print("Beginning test")
    lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star)))
    print(lsq_vs_truth_errors)

    for gamma in gamma_vals:
        sketch_size = int(gamma * d)
        print("Testing gamma: {}, num_iterations: {}".format(
            gamma, number_iterations))
        for sketch_method in sketches:
            #lsq_error, truth_error = 0,0
            lsq_error = np.zeros((number_iterations, ))
            truth_error = np.zeros_like(lsq_error)
            if sketch_method == 'sjlt':
                col_sparsity = 4
            else:
                col_sparsity = 1

            my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity)
            for step in STEPSIZE:
                lsq_error = np.zeros((number_iterations, ))
                for trial in range(NTRIALS):
                    print('*' * 80)
                    print("{}, trial: {}".format(sketch_method, trial))
                    print('Step size: ', step)
                    x_ihs, x_iters = my_ihs.ols_fit_one_sketch_track_errors(
                        number_iterations, step)
                    for _ in range(x_iters.shape[1]):
                        residual = prediction_error(X, x_iters[:, _], x_opt)
                        print('Trial {}, residual {}'.format(_, residual))
                        lsq_error[_] += residual

                    # Sketching Error for this step size.
                    frob_error = my_ihs.frob_error
                    spec_error = my_ihs.spectral_error
                    print('Frobenius error: ', frob_error)
                    print('Spectral error: ', spec_error)
                mean_lsq_error = lsq_error / NTRIALS
                error_to_lsq[sketch_method][gamma][step] = mean_lsq_error
    pretty = PrettyPrinter(indent=4)
    pretty.pprint(error_to_lsq)

    ### PLOTTING ###
    my_markers = ['.', 's', '^', 'D', '*', 'h']
    my_colours = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5']
    fig, ax = plt.subplots()
    x_vals = range(1, number_iterations + 1)
    for gamma in gamma_vals:
        for sketch_method in sketches:
            for i, step in enumerate(STEPSIZE):
                _marker = my_markers[i]
                _colour = my_colours[i]
                residual = error_to_lsq[sketch_method][gamma][step]
                ax.plot(x_vals,
                        residual,
                        label=step,
                        marker=_marker,
                        color=_colour)
    ax.set_yscale('log')
    ax.set_xticks(x_vals[1::2])
    ax.set_xlabel("Iterations")
    ax.set_ylabel('$\| x^t - x_{\t{opt}}\|_A^2$')
    ax.legend(title='Step sizes'
              )  # nb this only makes sense for one sketch dimension
    ax.set_title('{}, m={}d, step size varied'.format(sketches[0], gamma))
    plt.show()
def error_vs_time_real_data(data_name,X,y,penalty,sampling_factors,trials,times,x_opt):
    '''Show that a random lasso instance is approximated by the
    hessian sketching scheme'''


    # Experimental setup
    print(80*"-")
    print("Testing dataset: {}".format(data_name))
    print("TESTING LASSO ITERATIVE HESSIAN SKETCH ALGORITHM")
    times2test = times
    n,d = X.shape
    print("Is x_OPT all zeros? {}".format(x_opt == np.zeros_like(x_opt)))
    time_results = {}

    sparse_data = sparse.csr_matrix(X)

    for sketch in sketches:
        time_results[sketch] = {}
        for gamma in sampling_factors:
            time_results[sketch][gamma] = {}

    for sketch_method in sketches:
        for gamma in sampling_factors:

            solution_error_for_iter_check = 1.0  # to check whether the error is small
                                                 # enough to break out of the loop.

            for time_ in times2test:
            #for time_ in range(times):
                print("-"*80)
                print("Testing time: {}".format(time_))
                print("int-log-error: {}".format(np.int(solution_error_for_iter_check)))
                if np.int(solution_error_for_iter_check) <= -16:
                    # continuing for longer doesn't gain anything so just use
                    # previous results.
                    time_results[sketch_method][gamma][time_] = {"error to opt" : total_error2opt,
                                                         "solution error" : total_sol_error,
                                                         "num iterations" : total_iters_used}
                    print("Already converged before time {} seconds so continuing.".format(time_))

                else:
                    # total_error2opt       = 0
                    # total_error2truth     = 0
                    # total_sol_error       = 0
                    # total_objective_error = 0
                    # total_iters_used      = 0
                    total_error2opt       = []
                    total_sol_error       = []
                    total_objective_error = []
                    total_iters_used      = []

                    print("IHS-LASSO ALGORITHM on ({},{}) WITH {}, gamma {}".format(n,d,sketch_method, gamma))

                    for _trial in range(trials):
                        print("Trial {}".format(_trial))
                        shuffled_ids = np.random.permutation(n)
                        X_train, y_train = X[shuffled_ids,:], y[shuffled_ids]
                        sparse_X_train = sparse_data[shuffled_ids,:]
                        sparse_X_train = sparse_X_train.tocoo()
                        rows, cols, vals = sparse_X_train.row, sparse_X_train.col, sparse_X_train.data

                        my_ihs = ihs(X,y,sketch_method,np.int(gamma*d))
                        x_ihs, iters_used = my_ihs.lasso_fit_new_sketch_timing(penalty,time_)
                        my_prediction_error = prediction_error(X,x_opt,x_ihs)
                        print("Iterations completed: ", iters_used)
                        print("Prediction error: ",my_prediction_error)



                        #print("||x^OPT - x_hat||_A^2: {}".format((np.log(my_prediction_error/n))))

                        # Update dict output values
                        error2opt = my_prediction_error
                        solution_error = (1/n)*np.linalg.norm(x_ihs - x_opt)**2
                        print("Trial: {}, Error: {}".format(_trial, error2opt))
                        print("-"*80)
                        # Update counts
                        # total_error2opt  += error2opt
                        # total_sol_error  += solution_error
                        # total_iters_used += iters_used
                        total_error2opt.append(error2opt)
                        total_sol_error.append(solution_error)
                        total_iters_used.append(iters_used)

                    total_error2opt = np.median(total_error2opt)
                    total_sol_error = np.median(total_sol_error)
                    total_iters_used = np.median(total_iters_used)
                    print("Mean log||x^* - x'||_A^2: {}".format(np.log10(total_error2opt)))
                    print("Mean log||x^* - x'||^2: {}".format(total_sol_error))
                    print("Mean number of {} iterations used".format(total_iters_used))
                    time_results[sketch_method][gamma][time_] = {"error to opt" : total_error2opt,
                                                         "solution error" : total_sol_error,
                                                         "num iterations" : total_iters_used}
                    # Bookkeeping - if the error is at 10E-16 don't do another iteration.
                    solution_error_for_iter_check = np.log10(total_error2opt)
                    print("New sol_error_iters: {}".format(solution_error_for_iter_check))
    #
    pretty = PrettyPrinter(indent=4)
    pretty.pprint(time_results)
    return time_results
예제 #10
0
def solution_error_vs_row_dim():
    '''
    Increase `n` the input dimension of the problem and
    measure the solution error in both:
    (i) Euclidean norm (`mean_square_error`)
    (ii) Prediction norm (`prediction_error`).

    Error measurements are taken with respect to:
    (i) the optimal solution x_opt
    (ii) the ground truth

    '''
    print('Experimental setup:')
    print(f'IHS sketch size {SKETCH_SIZE}')
    print(f'Sketch and solve sketch size {CLASSICAL_SKETCH_SIZE}')
    print(f'Number of rounds {ROUNDS}')

    # Output dictionaries
    MSE_OPT = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }
    PRED_ERROR_OPT = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }
    MSE_TRUTH = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }
    PRED_ERROR_TRUTH = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }

    MSE_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), )
    PRED_ERROR_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), )
    MSE_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), )
    PRED_ERROR_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), )

    MSE_TRUTH['Exact'] = np.zeros(len(ROWDIMS), )
    PRED_ERROR_TRUTH['Exact'] = np.zeros(len(ROWDIMS), )

    ## Experiment
    for n in ROWDIMS:
        print(f'Testing {n} rows')
        experiment_index = ROWDIMS.index(n)
        _iters = ROUNDS[experiment_index]
        ihs_sketch_size = SKETCH_SIZE
        classic_sketch_size = CLASSICAL_SKETCH_SIZE[experiment_index]

        for trial in range(NTRIALS):
            print("TRIAL {}".format(trial))
            X, y, x_true = gaussian_design_unconstrained(n, D, variance=1.0)
            x_opt = np.linalg.lstsq(X, y)[0]

            for sketch_method in METHODS:
                print('*' * 80)
                if sketch_method in sketches or sketch_method == 'Sketch & Solve':
                    if sketch_method == 'sjlt':
                        col_sparsity = 4
                    else:
                        col_sparsity = 1

                    if sketch_method == 'Sketch & Solve':
                        _sketch = rp(X, classic_sketch_size, 'countSketch',
                                     col_sparsity)
                        SA, Sb = _sketch.sketch_data_targets(y)
                        x_ss = np.linalg.lstsq(SA, Sb)[0]
                        MSE_OPT[sketch_method][
                            experiment_index] += mean_square_error(
                                x_opt, x_ss)
                        PRED_ERROR_OPT[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_opt, x_ss)
                        MSE_TRUTH[sketch_method][
                            experiment_index] += mean_square_error(
                                x_true, x_ss)
                        PRED_ERROR_TRUTH[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_true, x_ss)
                    else:
                        print(f'{sketch_method} IHS')
                        my_ihs = ihs(X, y, sketch_method, ihs_sketch_size,
                                     col_sparsity)
                        x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors(
                            _iters)
                        x_errors = x_opt[:, None] - x_iters
                        print(x_errors.shape)
                        MSE_OPT[sketch_method][
                            experiment_index] += mean_square_error(
                                x_opt, x_ihs)
                        PRED_ERROR_OPT[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_opt, x_ihs)
                        MSE_TRUTH[sketch_method][
                            experiment_index] += mean_square_error(
                                x_true, x_ihs)
                        PRED_ERROR_TRUTH[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_true, x_ihs)
                else:
                    # solve exactly
                    #x_opt = np.linalg.lstsq(X,y)[0]
                    MSE_TRUTH["Exact"][experiment_index] += mean_square_error(
                        x_opt, x_true)
                    PRED_ERROR_TRUTH["Exact"][
                        experiment_index] += prediction_error(
                            X, x_opt, x_true)

    for _dict in [MSE_OPT, PRED_ERROR_OPT, MSE_TRUTH, PRED_ERROR_TRUTH]:
        for _key in _dict.keys():
            _dict[_key] /= NTRIALS

    pretty = PrettyPrinter(indent=4)
    pretty.pprint(MSE_OPT)
    pretty.pprint(PRED_ERROR_OPT)
    pretty.pprint(MSE_TRUTH)
    pretty.pprint(PRED_ERROR_TRUTH)

    save_dir = '../../output/baselines/'
    np.save(save_dir + 'ihs_ols_mse_OPT', MSE_OPT)
    np.save(save_dir + 'ihs_ols_pred_error_OPT', PRED_ERROR_OPT)
    np.save(save_dir + 'ihs_ols_mse_TRUTH', MSE_TRUTH)
    np.save(save_dir + 'ihs_ols_pred_error_TRUTH', PRED_ERROR_TRUTH)