def sjlt_error_vs_iterations(): n = 6_000 d = 200 gamma_vals = [5] #[4,6,8] sketch_size = int(gamma_vals[0] * d) col_sparsities = [1, 4, 16] number_iterations = 20 # 40 #np.asarray(np.linspace(5,40,8), dtype=np.int) # Output dictionaries error_to_lsq = {} #{sketch_name : {} for sketch_name in sketches} error_to_truth = {} #{sketch_name : {} for sketch_name in sketches} for s in col_sparsities: error_to_lsq[s] = [] error_to_truth[s] = [] print(error_to_lsq) print(error_to_truth) X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0) # Least squares estimator x_opt = np.linalg.lstsq(X, y)[0] lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star))) for s in col_sparsities: col_sparsity = s print("Testing col sparsity: {}, num_iterations: {}".format( col_sparsity, number_iterations)) for sketch_method in sketches: #lsq_error, truth_error = 0,0 lsq_error = np.zeros((number_iterations, )) truth_error = np.zeros_like(lsq_error) my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity) for trial in range(NTRIALS): print('*' * 80) print("{}, trial: {}".format(sketch_method, trial)) x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors( number_iterations) for _ in range(x_iters.shape[1]): lsq_error[_] += prediction_error(X, x_iters[:, _], x_opt) truth_error[_] += prediction_error(X, x_iters[:, _], x_star) print(lsq_error) # lsq_error += prediction_error(X,x_ihs, x_opt) # truth_error += prediction_error(X,x_ihs, x_star) mean_lsq_error = lsq_error / NTRIALS mean_truth_error = truth_error / NTRIALS print(mean_lsq_error) # error_to_lsq[sketch_method][gamma].append(mean_lsq_error) # error_to_truth[sketch_method][gamma].append(mean_truth_error) error_to_lsq[s] = mean_lsq_error error_to_truth[s] = mean_truth_error pretty = PrettyPrinter(indent=4) pretty.pprint(error_to_lsq) pretty.pprint(error_to_truth) # Save the dictionaries save_dir = '../../output/ihs_baselines//' np.save(save_dir + 'sjlt_error_sparsity_opt', error_to_lsq) np.save(save_dir + 'sjlt_error_sparsity_truth', error_to_truth)
def error_vs_dimensionality(): dimension = [2**i for i in range(4, 9)] METHODS = sketches + ['Exact', 'Sketch & Solve'] # Output dictionaries error_to_truth = {_: {} for _ in METHODS} for _ in METHODS: for d in dimension: error_to_truth[_][d] = 0 print(error_to_truth) for d in dimension: n = 100 * d print(f'TESTING {n},{d}') ii = dimension.index(d) sampling_rate = 10 num_iterations = 5 for method in METHODS: if method == 'sjlt': col_sparsity = 4 else: col_sparsity = 1 for trial in range(NTRIALS): # Generate the data X, y, x_star = gaussian_design_unconstrained(n, d, 1.0) if method is "Exact": print('Exact method.') x_hat = np.linalg.lstsq(X, y)[0] elif method is "Sketch & Solve": sketch_size = sampling_rate * num_iterations * d print(f"S&S with {sketch_size} sketch size") _sketch = rp(X, sketch_size, 'countSketch', col_sparsity) SA, Sb = _sketch.sketch_data_targets(y) x_hat = np.linalg.lstsq(SA, Sb)[0] else: sketch_size = sampling_rate * d print( f"Using {num_iterations} iterations, sketch_size {sketch_size} and {method}" ) my_ihs = ihs(X, y, method, sketch_size, col_sparsity) x_hat = my_ihs.ols_fit_new_sketch(num_iterations) error = (prediction_error(X, x_star, x_hat))**(0.5) error_to_truth[method][d] += error for _ in METHODS: for d in dimension: error_to_truth[_][d] /= NTRIALS error_to_truth['Dimensions'] = dimension pretty = PrettyPrinter(indent=4) pretty.pprint(error_to_truth) save_dir = '../../output/ihs_baselines/' np.save(save_dir + 'error_vs_dims', error_to_truth)
def error_vs_time(n, d, sampling_factors, trials, times2test, sklearn_lasso_bound): '''Show that a random lasso instance is approximated by the hessian sketching scheme''' print(80 * "-") print("TESTING LASSO ITERATIVE HESSIAN SKETCH ALGORITHM") print("Generating data") X, y, x_star = my_lasso_data(n, d) #X = normalize(X) ### Test Sklearn implementation print("Beginning test") x_opt, f_opt, sklearn_time = sklearn_wrapper(X, y, n, d, sklearn_lasso_bound, trials) print("LASSO-skl time: {}".format(sklearn_time)) # ground Truths sklearn_error2truth = prediction_error(X, x_opt, x_star) time_results = { "Sklearn": { "error to truth": sklearn_error2truth, "objective": f_opt, "solve time": sklearn_time }, } for sketch in sketches: time_results[sketch] = {} for gamma in sampling_factors: time_results[sketch][gamma] = {} for sketch_method in sketches: for gamma in sampling_factors: sketch_size = np.int(gamma * d) euclidean_error_for_iter_check = 1.0 # to check whether the error is small # enough to break out of the loop. for time in times2test: print("-" * 80) print("Testing time: {}".format(time)) print("int-log-error: {}".format( np.int(euclidean_error_for_iter_check))) if np.int(euclidean_error_for_iter_check) <= -16: # continuing for longer doesn't gain anything so just use # previous results. time_results[sketch_method][gamma][time] = { "error to opt": total_error2opt, "solution error": total_sol_error, "num iterations": total_iters_used } print( "Already converged before time {} seconds so continuing." .format(time)) else: total_error2opt = 0 total_error2truth = 0 total_sol_error = 0 total_objective_error = 0 total_iters_used = 0 print("IHS-LASSO ALGORITHM on ({},{}) WITH {}, gamma {}". format(n, d, sketch_method, gamma)) results = Parallel(n_jobs=-1,prefer="threads")(delayed(single_exp)\ (_trial,n,d,X,y,sketch_size, sketch_method,time,sklearn_lasso_bound) for _trial in range(trials)) for i in range(trials): x_ihs = results[i][0] total_iters_used += results[i][ 1] #np.abs(results[i][0]) # Update dict output values error2opt = prediction_error(X, x_opt, x_ihs)**2 euclidean_error = (1 / n) * np.linalg.norm(x_ihs - x_opt)**2 # Update counts total_error2opt += error2opt total_sol_error += euclidean_error total_error2opt /= trials total_sol_error /= trials total_iters_used /= trials print("Mean log||x^* - x'||_A^2: {}".format( np.log10(total_error2opt))) print("Mean log||x^* - x'||^2: {}".format(total_sol_error)) print("Mean number of {} iterations used".format( total_iters_used)) time_results[sketch_method][gamma][time] = { "error to opt": total_error2opt, "solution error": total_sol_error, "num iterations": total_iters_used } # Bookkeeping - if the error is at 10E-16 don't do another iteration. euclidean_error_for_iter_check = np.log10(total_error2opt) print("New sol_error_iters: {}".format( euclidean_error_for_iter_check)) pretty = PrettyPrinter(indent=4) pretty.pprint(time_results) file_name = '../../output/ihs_timings/ihs_time_synthetic' + str( n) + '_' + str(d) + '.npy' np.save(file_name, time_results) pass
def error_vs_iterations(): n = 6_000 d = 200 gamma_vals = [5] number_iterations = 30 # Output dictionaries indexed by: # sketch method (sketches) --> sketch size (gamma_vals) --> STEPSIZE error_to_lsq = {sketch_name: {} for sketch_name in sketches} error_to_truth = {sketch_name: {} for sketch_name in sketches} for sketch_name in sketches: for gamma in gamma_vals: error_to_lsq[sketch_name][gamma] = {} error_to_truth[sketch_name][gamma] = {} for step in STEPSIZE: error_to_lsq[sketch_name][gamma][step] = [] error_to_truth[sketch_name][gamma][step] = [] X, y, x_star = gaussian_design_unconstrained(n, d, variance=1.0) # # Least squares estimator x_opt = np.linalg.lstsq(X, y)[0] print('-' * 80) print("Beginning test") lsq_vs_truth_errors = np.log(np.sqrt(prediction_error(X, x_opt, x_star))) print(lsq_vs_truth_errors) for gamma in gamma_vals: sketch_size = int(gamma * d) print("Testing gamma: {}, num_iterations: {}".format( gamma, number_iterations)) for sketch_method in sketches: #lsq_error, truth_error = 0,0 lsq_error = np.zeros((number_iterations, )) truth_error = np.zeros_like(lsq_error) if sketch_method == 'sjlt': col_sparsity = 4 else: col_sparsity = 1 my_ihs = ihs(X, y, sketch_method, sketch_size, col_sparsity) for step in STEPSIZE: lsq_error = np.zeros((number_iterations, )) for trial in range(NTRIALS): print('*' * 80) print("{}, trial: {}".format(sketch_method, trial)) print('Step size: ', step) x_ihs, x_iters = my_ihs.ols_fit_one_sketch_track_errors( number_iterations, step) for _ in range(x_iters.shape[1]): residual = prediction_error(X, x_iters[:, _], x_opt) print('Trial {}, residual {}'.format(_, residual)) lsq_error[_] += residual # Sketching Error for this step size. frob_error = my_ihs.frob_error spec_error = my_ihs.spectral_error print('Frobenius error: ', frob_error) print('Spectral error: ', spec_error) mean_lsq_error = lsq_error / NTRIALS error_to_lsq[sketch_method][gamma][step] = mean_lsq_error pretty = PrettyPrinter(indent=4) pretty.pprint(error_to_lsq) ### PLOTTING ### my_markers = ['.', 's', '^', 'D', '*', 'h'] my_colours = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5'] fig, ax = plt.subplots() x_vals = range(1, number_iterations + 1) for gamma in gamma_vals: for sketch_method in sketches: for i, step in enumerate(STEPSIZE): _marker = my_markers[i] _colour = my_colours[i] residual = error_to_lsq[sketch_method][gamma][step] ax.plot(x_vals, residual, label=step, marker=_marker, color=_colour) ax.set_yscale('log') ax.set_xticks(x_vals[1::2]) ax.set_xlabel("Iterations") ax.set_ylabel('$\| x^t - x_{\t{opt}}\|_A^2$') ax.legend(title='Step sizes' ) # nb this only makes sense for one sketch dimension ax.set_title('{}, m={}d, step size varied'.format(sketches[0], gamma)) plt.show()
def error_vs_time_real_data(data_name,X,y,penalty,sampling_factors,trials,times,x_opt): '''Show that a random lasso instance is approximated by the hessian sketching scheme''' # Experimental setup print(80*"-") print("Testing dataset: {}".format(data_name)) print("TESTING LASSO ITERATIVE HESSIAN SKETCH ALGORITHM") times2test = times n,d = X.shape print("Is x_OPT all zeros? {}".format(x_opt == np.zeros_like(x_opt))) time_results = {} sparse_data = sparse.csr_matrix(X) for sketch in sketches: time_results[sketch] = {} for gamma in sampling_factors: time_results[sketch][gamma] = {} for sketch_method in sketches: for gamma in sampling_factors: solution_error_for_iter_check = 1.0 # to check whether the error is small # enough to break out of the loop. for time_ in times2test: #for time_ in range(times): print("-"*80) print("Testing time: {}".format(time_)) print("int-log-error: {}".format(np.int(solution_error_for_iter_check))) if np.int(solution_error_for_iter_check) <= -16: # continuing for longer doesn't gain anything so just use # previous results. time_results[sketch_method][gamma][time_] = {"error to opt" : total_error2opt, "solution error" : total_sol_error, "num iterations" : total_iters_used} print("Already converged before time {} seconds so continuing.".format(time_)) else: # total_error2opt = 0 # total_error2truth = 0 # total_sol_error = 0 # total_objective_error = 0 # total_iters_used = 0 total_error2opt = [] total_sol_error = [] total_objective_error = [] total_iters_used = [] print("IHS-LASSO ALGORITHM on ({},{}) WITH {}, gamma {}".format(n,d,sketch_method, gamma)) for _trial in range(trials): print("Trial {}".format(_trial)) shuffled_ids = np.random.permutation(n) X_train, y_train = X[shuffled_ids,:], y[shuffled_ids] sparse_X_train = sparse_data[shuffled_ids,:] sparse_X_train = sparse_X_train.tocoo() rows, cols, vals = sparse_X_train.row, sparse_X_train.col, sparse_X_train.data my_ihs = ihs(X,y,sketch_method,np.int(gamma*d)) x_ihs, iters_used = my_ihs.lasso_fit_new_sketch_timing(penalty,time_) my_prediction_error = prediction_error(X,x_opt,x_ihs) print("Iterations completed: ", iters_used) print("Prediction error: ",my_prediction_error) #print("||x^OPT - x_hat||_A^2: {}".format((np.log(my_prediction_error/n)))) # Update dict output values error2opt = my_prediction_error solution_error = (1/n)*np.linalg.norm(x_ihs - x_opt)**2 print("Trial: {}, Error: {}".format(_trial, error2opt)) print("-"*80) # Update counts # total_error2opt += error2opt # total_sol_error += solution_error # total_iters_used += iters_used total_error2opt.append(error2opt) total_sol_error.append(solution_error) total_iters_used.append(iters_used) total_error2opt = np.median(total_error2opt) total_sol_error = np.median(total_sol_error) total_iters_used = np.median(total_iters_used) print("Mean log||x^* - x'||_A^2: {}".format(np.log10(total_error2opt))) print("Mean log||x^* - x'||^2: {}".format(total_sol_error)) print("Mean number of {} iterations used".format(total_iters_used)) time_results[sketch_method][gamma][time_] = {"error to opt" : total_error2opt, "solution error" : total_sol_error, "num iterations" : total_iters_used} # Bookkeeping - if the error is at 10E-16 don't do another iteration. solution_error_for_iter_check = np.log10(total_error2opt) print("New sol_error_iters: {}".format(solution_error_for_iter_check)) # pretty = PrettyPrinter(indent=4) pretty.pprint(time_results) return time_results
def solution_error_vs_row_dim(): ''' Increase `n` the input dimension of the problem and measure the solution error in both: (i) Euclidean norm (`mean_square_error`) (ii) Prediction norm (`prediction_error`). Error measurements are taken with respect to: (i) the optimal solution x_opt (ii) the ground truth ''' print('Experimental setup:') print(f'IHS sketch size {SKETCH_SIZE}') print(f'Sketch and solve sketch size {CLASSICAL_SKETCH_SIZE}') print(f'Number of rounds {ROUNDS}') # Output dictionaries MSE_OPT = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } PRED_ERROR_OPT = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } MSE_TRUTH = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } PRED_ERROR_TRUTH = { sketches[i]: np.zeros(len(ROWDIMS), ) for i in range(len(sketches)) } MSE_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) PRED_ERROR_OPT['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) MSE_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) PRED_ERROR_TRUTH['Sketch & Solve'] = np.zeros(len(ROWDIMS), ) MSE_TRUTH['Exact'] = np.zeros(len(ROWDIMS), ) PRED_ERROR_TRUTH['Exact'] = np.zeros(len(ROWDIMS), ) ## Experiment for n in ROWDIMS: print(f'Testing {n} rows') experiment_index = ROWDIMS.index(n) _iters = ROUNDS[experiment_index] ihs_sketch_size = SKETCH_SIZE classic_sketch_size = CLASSICAL_SKETCH_SIZE[experiment_index] for trial in range(NTRIALS): print("TRIAL {}".format(trial)) X, y, x_true = gaussian_design_unconstrained(n, D, variance=1.0) x_opt = np.linalg.lstsq(X, y)[0] for sketch_method in METHODS: print('*' * 80) if sketch_method in sketches or sketch_method == 'Sketch & Solve': if sketch_method == 'sjlt': col_sparsity = 4 else: col_sparsity = 1 if sketch_method == 'Sketch & Solve': _sketch = rp(X, classic_sketch_size, 'countSketch', col_sparsity) SA, Sb = _sketch.sketch_data_targets(y) x_ss = np.linalg.lstsq(SA, Sb)[0] MSE_OPT[sketch_method][ experiment_index] += mean_square_error( x_opt, x_ss) PRED_ERROR_OPT[sketch_method][ experiment_index] += prediction_error( X, x_opt, x_ss) MSE_TRUTH[sketch_method][ experiment_index] += mean_square_error( x_true, x_ss) PRED_ERROR_TRUTH[sketch_method][ experiment_index] += prediction_error( X, x_true, x_ss) else: print(f'{sketch_method} IHS') my_ihs = ihs(X, y, sketch_method, ihs_sketch_size, col_sparsity) x_ihs, x_iters = my_ihs.ols_fit_new_sketch_track_errors( _iters) x_errors = x_opt[:, None] - x_iters print(x_errors.shape) MSE_OPT[sketch_method][ experiment_index] += mean_square_error( x_opt, x_ihs) PRED_ERROR_OPT[sketch_method][ experiment_index] += prediction_error( X, x_opt, x_ihs) MSE_TRUTH[sketch_method][ experiment_index] += mean_square_error( x_true, x_ihs) PRED_ERROR_TRUTH[sketch_method][ experiment_index] += prediction_error( X, x_true, x_ihs) else: # solve exactly #x_opt = np.linalg.lstsq(X,y)[0] MSE_TRUTH["Exact"][experiment_index] += mean_square_error( x_opt, x_true) PRED_ERROR_TRUTH["Exact"][ experiment_index] += prediction_error( X, x_opt, x_true) for _dict in [MSE_OPT, PRED_ERROR_OPT, MSE_TRUTH, PRED_ERROR_TRUTH]: for _key in _dict.keys(): _dict[_key] /= NTRIALS pretty = PrettyPrinter(indent=4) pretty.pprint(MSE_OPT) pretty.pprint(PRED_ERROR_OPT) pretty.pprint(MSE_TRUTH) pretty.pprint(PRED_ERROR_TRUTH) save_dir = '../../output/baselines/' np.save(save_dir + 'ihs_ols_mse_OPT', MSE_OPT) np.save(save_dir + 'ihs_ols_pred_error_OPT', PRED_ERROR_OPT) np.save(save_dir + 'ihs_ols_mse_TRUTH', MSE_TRUTH) np.save(save_dir + 'ihs_ols_pred_error_TRUTH', PRED_ERROR_TRUTH)