コード例 #1
0
def sjlt_error_vs_iterations():
    n = 6_000
    d = 200
    gamma_vals = [5]  #[4,6,8]
    sketch_size = int(gamma_vals[0] * d)
    col_sparsities = [1, 4, 16]
    number_iterations = 20  # 40 #np.asarray(np.linspace(5,40,8), dtype=np.int)
    # Output dictionaries
    error_to_lsq = {}  #{sketch_name : {} for sketch_name in sketches}
    error_to_truth = {}  #{sketch_name : {} for sketch_name in sketches}
    for s in col_sparsities:
        error_to_lsq[s] = []
        error_to_truth[s] = []
    print(error_to_lsq)
    print(error_to_truth)

    X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0)

    # Least squares estimator
    x_opt = np.linalg.lstsq(X, y)[0]
    lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star)))

    for s in col_sparsities:
        col_sparsity = s
        print("Testing col sparsity: {}, num_iterations: {}".format(
            col_sparsity, number_iterations))
        for sketch_method in sketches:
            #lsq_error, truth_error = 0,0
            lsq_error = np.zeros((number_iterations, ))
            truth_error = np.zeros_like(lsq_error)

            my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity)
            for trial in range(NTRIALS):
                print('*' * 80)
                print("{}, trial: {}".format(sketch_method, trial))
                x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors(
                    number_iterations)
                for _ in range(x_iters.shape[1]):
                    lsq_error[_] += prediction_error(X, x_iters[:, _], x_opt)
                    truth_error[_] += prediction_error(X, x_iters[:, _],
                                                       x_star)
                print(lsq_error)
                # lsq_error += prediction_error(X,x_ihs, x_opt)
                # truth_error += prediction_error(X,x_ihs, x_star)
            mean_lsq_error = lsq_error / NTRIALS
            mean_truth_error = truth_error / NTRIALS
            print(mean_lsq_error)
            # error_to_lsq[sketch_method][gamma].append(mean_lsq_error)
            # error_to_truth[sketch_method][gamma].append(mean_truth_error)
            error_to_lsq[s] = mean_lsq_error
            error_to_truth[s] = mean_truth_error
    pretty = PrettyPrinter(indent=4)
    pretty.pprint(error_to_lsq)
    pretty.pprint(error_to_truth)

    # Save the dictionaries
    save_dir = '../../output/ihs_baselines//'
    np.save(save_dir + 'sjlt_error_sparsity_opt', error_to_lsq)
    np.save(save_dir + 'sjlt_error_sparsity_truth', error_to_truth)
コード例 #2
0
def error_vs_dimensionality():
    dimension = [2**i for i in range(4, 9)]
    METHODS = sketches + ['Exact', 'Sketch & Solve']

    # Output dictionaries
    error_to_truth = {_: {} for _ in METHODS}
    for _ in METHODS:
        for d in dimension:
            error_to_truth[_][d] = 0
    print(error_to_truth)

    for d in dimension:
        n = 100 * d
        print(f'TESTING {n},{d}')
        ii = dimension.index(d)
        sampling_rate = 10
        num_iterations = 5
        for method in METHODS:
            if method == 'sjlt':
                col_sparsity = 4
            else:
                col_sparsity = 1
            for trial in range(NTRIALS):
                # Generate the data
                X, y, x_star = gaussian_design_unconstrained(n, d, 1.0)
                if method is "Exact":
                    print('Exact method.')
                    x_hat = np.linalg.lstsq(X, y)[0]

                elif method is "Sketch & Solve":
                    sketch_size = sampling_rate * num_iterations * d
                    print(f"S&S with {sketch_size} sketch size")
                    _sketch = rp(X, sketch_size, 'countSketch', col_sparsity)
                    SA, Sb = _sketch.sketch_data_targets(y)
                    x_hat = np.linalg.lstsq(SA, Sb)[0]
                else:
                    sketch_size = sampling_rate * d
                    print(
                        f"Using {num_iterations} iterations, sketch_size {sketch_size} and {method}"
                    )
                    my_ihs = ihs(X, y, method, sketch_size, col_sparsity)
                    x_hat = my_ihs.ols_fit_new_sketch(num_iterations)

                error = (prediction_error(X, x_star, x_hat))**(0.5)
                error_to_truth[method][d] += error
    for _ in METHODS:
        for d in dimension:
            error_to_truth[_][d] /= NTRIALS
    error_to_truth['Dimensions'] = dimension
    pretty = PrettyPrinter(indent=4)
    pretty.pprint(error_to_truth)
    save_dir = '../../output/ihs_baselines/'
    np.save(save_dir + 'error_vs_dims', error_to_truth)
コード例 #3
0
def test_ols_new_sketch_per_iteration(all_sketch_methods):
    '''
    Test that using IHS and generating a new sketch every iteration yields
    an approximation close to the true estimator.'''
    X,y,_ = gaussian_design_unconstrained(2**13,50,variance=2.5)
    x_opt = np.linalg.lstsq(X,y,rcond=None)[0] # rcond just to suppres warning as per docs
    for sketch_method in all_sketch_methods:
        my_ihs = ihs(X,y,sketch_method,500)
        x_ihs = my_ihs.ols_fit_new_sketch(iterations=20)
        x_ihs_track, error_track = my_ihs.ols_fit_new_sketch_track_errors(iterations=20)
        print(sketch_method, np.linalg.norm(x_ihs - x_opt))
        print(f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}')
        assert np.allclose(x_opt,x_ihs)
        assert np.allclose(x_opt,x_ihs_track)
コード例 #4
0
def test_ols_one_sketch_per_iteration(all_sketch_methods):
    '''
    Test that using IHS and generating *A SINGLE* sketch yields
    an approximation close to the true estimator.

    Need a larger sketch compared to the test with a new sketch for every
    iteration'''
    X,y,_ = gaussian_design_unconstrained(2**13,50,variance=2.5)
    x_opt = np.linalg.lstsq(X,y,rcond=None)[0] # rcond just to suppres warning as per docs
    for sketch_method in all_sketch_methods:
        my_ihs = ihs(X,y,sketch_method,1000)
        x_ihs = my_ihs.ols_fit_one_sketch(iterations=50)
        x_ihs_track, error_track = my_ihs.ols_fit_one_sketch_track_errors(iterations=20)
        print(sketch_method, np.linalg.norm(x_ihs - x_opt))
        print(f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}')
        #assert np.isclose(x_opt,x_ihs)
        np.testing.assert_array_almost_equal(x_ihs,x_opt)
        assert np.allclose(x_opt,x_ihs_track)
コード例 #5
0
def test_lasso_solver_time(all_sketch_methods):
    '''
    Tests that the lasso qp solver gives the same answers
    as the sklearn linear model.
    Generate the sklearn solution first, then take
    then norm and compare.

    nb. We don't compare to sklearn as there is not a
    clean matching between the regularising parameters
    so only check the global and iterative QPs agree.
    '''
    X, y, x_star = gaussian_design_unconstrained(2000, 10, 1.0)
    n, d = X.shape
    ell_1_bound = 100.0
    # _lambda = 100.0
    # lassoModel = Lasso(alpha=1.0 ,max_iter=1000)
    # sklearn_X, sklearn_y = np.sqrt(n)*X, np.sqrt(n)*y
    # lassoModel.fit(sklearn_X, sklearn_y)
    # x_opt = lassoModel.coef_

    x_opt = lasso_solver(X, y, ell_1_bound)
    x0 = np.zeros((d, ))

    for sketch_method in all_sketch_methods:
        my_ihs = ihs(X, y, sketch_method, 500)
        x_ihs_track, error_track = my_ihs.lasso_fit_new_sketch_timing(
            ell_1_bound, 1.5)
        final_sol_error = (1 / n) * np.linalg.norm(
            X @ (x_ihs_track - x_opt))**2
        print(
            f'Tracking {sketch_method}, error {np.linalg.norm(x_ihs_track - x_opt)}'
        )
        print("log Error to opt: {}".format(np.log(final_sol_error)))
        print(f"{error_track.shape[1]} iterations completed")
        print(np.c_[x_opt, x_ihs_track])
        assert np.allclose(x_opt, x_ihs_track, 1E-1)
コード例 #6
0
def error_vs_iterations():
    n = 6_000
    d = 200
    gamma_vals = [5]
    number_iterations = 30

    # Output dictionaries indexed by:
    # sketch method (sketches) --> sketch size (gamma_vals) --> STEPSIZE
    error_to_lsq = {sketch_name: {} for sketch_name in sketches}
    error_to_truth = {sketch_name: {} for sketch_name in sketches}
    for sketch_name in sketches:
        for gamma in gamma_vals:
            error_to_lsq[sketch_name][gamma] = {}
            error_to_truth[sketch_name][gamma] = {}
            for step in STEPSIZE:
                error_to_lsq[sketch_name][gamma][step] = []
                error_to_truth[sketch_name][gamma][step] = []

    X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0)

    # # Least squares estimator
    x_opt = np.linalg.lstsq(X, y)[0]
    print('-' * 80)
    print("Beginning test")
    lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star)))
    print(lsq_vs_truth_errors)

    for gamma in gamma_vals:
        sketch_size = int(gamma * d)
        print("Testing gamma: {}, num_iterations: {}".format(
            gamma, number_iterations))
        for sketch_method in sketches:
            #lsq_error, truth_error = 0,0
            lsq_error = np.zeros((number_iterations, ))
            truth_error = np.zeros_like(lsq_error)
            if sketch_method == 'sjlt':
                col_sparsity = 4
            else:
                col_sparsity = 1

            my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity)
            for step in STEPSIZE:
                lsq_error = np.zeros((number_iterations, ))
                for trial in range(NTRIALS):
                    print('*' * 80)
                    print("{}, trial: {}".format(sketch_method, trial))
                    print('Step size: ', step)
                    x_ihs, x_iters = my_ihs.ols_fit_one_sketch_track_errors(
                        number_iterations, step)
                    for _ in range(x_iters.shape[1]):
                        residual = prediction_error(X, x_iters[:, _], x_opt)
                        print('Trial {}, residual {}'.format(_, residual))
                        lsq_error[_] += residual

                    # Sketching Error for this step size.
                    frob_error = my_ihs.frob_error
                    spec_error = my_ihs.spectral_error
                    print('Frobenius error: ', frob_error)
                    print('Spectral error: ', spec_error)
                mean_lsq_error = lsq_error / NTRIALS
                error_to_lsq[sketch_method][gamma][step] = mean_lsq_error
    pretty = PrettyPrinter(indent=4)
    pretty.pprint(error_to_lsq)

    ### PLOTTING ###
    my_markers = ['.', 's', '^', 'D', '*', 'h']
    my_colours = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5']
    fig, ax = plt.subplots()
    x_vals = range(1, number_iterations + 1)
    for gamma in gamma_vals:
        for sketch_method in sketches:
            for i, step in enumerate(STEPSIZE):
                _marker = my_markers[i]
                _colour = my_colours[i]
                residual = error_to_lsq[sketch_method][gamma][step]
                ax.plot(x_vals,
                        residual,
                        label=step,
                        marker=_marker,
                        color=_colour)
    ax.set_yscale('log')
    ax.set_xticks(x_vals[1::2])
    ax.set_xlabel("Iterations")
    ax.set_ylabel('$\| x^t - x_{\t{opt}}\|_A^2$')
    ax.legend(title='Step sizes'
              )  # nb this only makes sense for one sketch dimension
    ax.set_title('{}, m={}d, step size varied'.format(sketches[0], gamma))
    plt.show()
コード例 #7
0
def solution_error_vs_row_dim():
    '''
    Increase `n` the input dimension of the problem and
    measure the solution error in both:
    (i) Euclidean norm (`mean_square_error`)
    (ii) Prediction norm (`prediction_error`).

    Error measurements are taken with respect to:
    (i) the optimal solution x_opt
    (ii) the ground truth

    '''
    print('Experimental setup:')
    print(f'IHS sketch size {SKETCH_SIZE}')
    print(f'Sketch and solve sketch size {CLASSICAL_SKETCH_SIZE}')
    print(f'Number of rounds {ROUNDS}')

    # Output dictionaries
    MSE_OPT = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }
    PRED_ERROR_OPT = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }
    MSE_TRUTH = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }
    PRED_ERROR_TRUTH = {
        sketches[i]: np.zeros(len(ROWDIMS), )
        for i in range(len(sketches))
    }

    MSE_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), )
    PRED_ERROR_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), )
    MSE_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), )
    PRED_ERROR_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), )

    MSE_TRUTH['Exact'] = np.zeros(len(ROWDIMS), )
    PRED_ERROR_TRUTH['Exact'] = np.zeros(len(ROWDIMS), )

    ## Experiment
    for n in ROWDIMS:
        print(f'Testing {n} rows')
        experiment_index = ROWDIMS.index(n)
        _iters = ROUNDS[experiment_index]
        ihs_sketch_size = SKETCH_SIZE
        classic_sketch_size = CLASSICAL_SKETCH_SIZE[experiment_index]

        for trial in range(NTRIALS):
            print("TRIAL {}".format(trial))
            X, y, x_true = gaussian_design_unconstrained(n, D, variance=1.0)
            x_opt = np.linalg.lstsq(X, y)[0]

            for sketch_method in METHODS:
                print('*' * 80)
                if sketch_method in sketches or sketch_method == 'Sketch & Solve':
                    if sketch_method == 'sjlt':
                        col_sparsity = 4
                    else:
                        col_sparsity = 1

                    if sketch_method == 'Sketch & Solve':
                        _sketch = rp(X, classic_sketch_size, 'countSketch',
                                     col_sparsity)
                        SA, Sb = _sketch.sketch_data_targets(y)
                        x_ss = np.linalg.lstsq(SA, Sb)[0]
                        MSE_OPT[sketch_method][
                            experiment_index] += mean_square_error(
                                x_opt, x_ss)
                        PRED_ERROR_OPT[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_opt, x_ss)
                        MSE_TRUTH[sketch_method][
                            experiment_index] += mean_square_error(
                                x_true, x_ss)
                        PRED_ERROR_TRUTH[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_true, x_ss)
                    else:
                        print(f'{sketch_method} IHS')
                        my_ihs = ihs(X, y, sketch_method, ihs_sketch_size,
                                     col_sparsity)
                        x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors(
                            _iters)
                        x_errors = x_opt[:, None] - x_iters
                        print(x_errors.shape)
                        MSE_OPT[sketch_method][
                            experiment_index] += mean_square_error(
                                x_opt, x_ihs)
                        PRED_ERROR_OPT[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_opt, x_ihs)
                        MSE_TRUTH[sketch_method][
                            experiment_index] += mean_square_error(
                                x_true, x_ihs)
                        PRED_ERROR_TRUTH[sketch_method][
                            experiment_index] += prediction_error(
                                X, x_true, x_ihs)
                else:
                    # solve exactly
                    #x_opt = np.linalg.lstsq(X,y)[0]
                    MSE_TRUTH["Exact"][experiment_index] += mean_square_error(
                        x_opt, x_true)
                    PRED_ERROR_TRUTH["Exact"][
                        experiment_index] += prediction_error(
                            X, x_opt, x_true)

    for _dict in [MSE_OPT, PRED_ERROR_OPT, MSE_TRUTH, PRED_ERROR_TRUTH]:
        for _key in _dict.keys():
            _dict[_key] /= NTRIALS

    pretty = PrettyPrinter(indent=4)
    pretty.pprint(MSE_OPT)
    pretty.pprint(PRED_ERROR_OPT)
    pretty.pprint(MSE_TRUTH)
    pretty.pprint(PRED_ERROR_TRUTH)

    save_dir = '../../output/baselines/'
    np.save(save_dir + 'ihs_ols_mse_OPT', MSE_OPT)
    np.save(save_dir + 'ihs_ols_pred_error_OPT', PRED_ERROR_OPT)
    np.save(save_dir + 'ihs_ols_mse_TRUTH', MSE_TRUTH)
    np.save(save_dir + 'ihs_ols_pred_error_TRUTH', PRED_ERROR_TRUTH)