def simulations(): que = {} progress_iterator = ProgressIterator(4) progress_iterator.next() avg_v1, avg_vrand, avg_vmin = test_one() que[1] = ("v min :", avg_vmin) progress_iterator.next() in_error, out_error = experiment(test_two, [100, 1000], 1000) que[5] = ("in sample error :", in_error) que[6] = ("out sample error :", out_error) progress_iterator.next() iterations = experiment(test_three, [10], 1000) que[7] = ("iterations :", iterations) progress_iterator.next() results = np.array([test_four(100, 1000) for _ in range(1000)], dtype=object) in_error_no_transform = np.mean(results[:, 0]) weight = np.mean(results[:, 1], axis=0) out_error_transform = np.mean(results[:, 2]) que[8] = ("in sample error -- without higher dimension transformation :", in_error_no_transform) que[9] = ("higher dimensional weights :", weight) que[10] = ("out of sample error -- with higher dimension transformation :", out_error_transform) return que
def simulations(): que = {} progress_iterator = ProgressIterator(4) progress_iterator.next() que[1] = ("sample points needed :", datapoints_needed(0.008, 0.1, 8)) progress_iterator.next() gradient = [in_error_derivative_u, in_error_derivative_v] value, point, iterations = find_threshold(in_error, in_error_gradient, [1,1], 0.1, mpf(10)**mpf(-14), 0 ) que[5] = ( "gradient descent results", "\n\tvalue : " + str(value) \ + "\n\tpoint : " + str(point) \ + " # ans to question 6" \ + "\n\titerations : " + str(iterations)\ + " # ans to question 5" ) progress_iterator.next() gradient = [in_error_derivative_u, in_error_derivative_v] value, point, iterations = coordinate_descent_max_iterations( in_error, gradient, [1,1], mpf('0.1'), 30) que[7] = ( "coordinate gradient descent results", "\n\tvalue : " + str(value) \ + "\n\tpoint : " + str(point) \ + "\n\titerations : " + str(iterations) ) def trial_no_weights(*args): weight, iterations, out_sample_error = trial(*args) return iterations, out_sample_error progress_iterator.next() iterations, out_sample_error = experiment(trial_no_weights, [100, 1000], 100) que[8] = ("out of sample cross entrophy error :", out_sample_error) que[9] = ("iterations :", iterations) return que
def simulations(): que ={} progress_iterator = ProgressIterator(4) progress_iterator.next() avg_v1, avg_vrand, avg_vmin = test_one() que[1] = ("v min :", avg_vmin) progress_iterator.next() in_error, out_error = experiment(test_two, [100, 1000], 1000) que[5] = ("in sample error :", in_error) que[6] = ("out sample error :", out_error) progress_iterator.next() iterations = experiment(test_three, [10], 1000) que[7] = ("iterations :", iterations) progress_iterator.next() results = np.array([ test_four(100, 1000) for _ in range(1000) ], dtype=object) in_error_no_transform = np.mean(results[:,0]) weight = np.mean(results[:,1], axis=0) out_error_transform = np.mean(results[:,2]) que[8] = ("in sample error -- without higher dimension transformation :", in_error_no_transform) que[9] = ("higher dimensional weights :", weight) que[10] = ("out of sample error -- with higher dimension transformation :", out_error_transform) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(file_dir, "in.dta")) testing_data = np.genfromtxt(os.path.join(file_dir, "out.dta")) progress_iterator = ProgressIterator(4) progress_iterator.next() in_sample_error, out_of_sample_error = test1(training_data, testing_data) que[2] = ("linear regression", "\n\tin sample error : " + str(in_sample_error) + \ "\n\tout of sample error : " + str(out_of_sample_error)) progress_iterator.next() in_sample_error, out_of_sample_error = trial(training_data, testing_data, pow_10(-3)) que[3] = ("linear regression with weight decay, k=-3", "\n\tin sample error : " + str(in_sample_error) + \ "\n\tout of sample error : " + str(out_of_sample_error)) progress_iterator.next() in_sample_error, out_of_sample_error = trial(training_data, testing_data, pow_10(3)) que[4] = ("linear regression with weight decay, k=3", "\n\tin sample error : " + str(in_sample_error) + \ "\n\tout of sample error : " + str(out_of_sample_error)) progress_iterator.next() out_of_sample_errors = [ str(trial(training_data, testing_data, pow_10(k))[1]) for k in range(-2,3) ] pretty_table = tabulate( [ [k, out_of_sample_errors[k+2]] for k in range(-2,3) ], headers=['k', "EOUT"]) que[5] = ("Also includes answer to question 6\n\nlinear regression with weight decay, k=-2..2", "\nout of sample errors\n" + pretty_table) return que
def simulations(): que ={} progress_iterator = ProgressIterator(2) progress_iterator.next() out_error, iterations = experiment(trial, [10, 100], 1000) que[7] = ("iterations :", iterations) que[8] = ("out of sample error :", out_error) progress_iterator.next() out_error, iterations = experiment(trial, [100, 100], 1000) que[9] = ("iterations :", iterations) que[10] = ("out of sample error :", out_error) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(file_dir, "in.dta")) testing_data = np.genfromtxt(os.path.join(file_dir, "out.dta")) progress_iterator = ProgressIterator(4) progress_iterator.next() in_sample_error, out_of_sample_error = test1(training_data, testing_data) que[2] = ("linear regression", "\n\tin sample error : " + str(in_sample_error) + \ "\n\tout of sample error : " + str(out_of_sample_error)) progress_iterator.next() in_sample_error, out_of_sample_error = trial(training_data, testing_data, pow_10(-3)) que[3] = ("linear regression with weight decay, k=-3", "\n\tin sample error : " + str(in_sample_error) + \ "\n\tout of sample error : " + str(out_of_sample_error)) progress_iterator.next() in_sample_error, out_of_sample_error = trial(training_data, testing_data, pow_10(3)) que[4] = ("linear regression with weight decay, k=3", "\n\tin sample error : " + str(in_sample_error) + \ "\n\tout of sample error : " + str(out_of_sample_error)) progress_iterator.next() out_of_sample_errors = [ str(trial(training_data, testing_data, pow_10(k))[1]) for k in range(-2, 3) ] pretty_table = tabulate([[k, out_of_sample_errors[k + 2]] for k in range(-2, 3)], headers=['k', "EOUT"]) que[5] = ( "Also includes answer to question 6\n\nlinear regression with weight decay, k=-2..2", "\nout of sample errors\n" + pretty_table) return que
def simulations(): que = {} progress_iterator = ProgressIterator(2) progress_iterator.next() out_error, iterations = experiment(myTrial, [10, 100], 1000) que[7] = ("iterations :", iterations) que[8] = ("out of sample error :", out_error) progress_iterator.next() out_error, iterations = experiment(myTrial, [100, 100], 1000) que[9] = ("iterations :", iterations) que[10] = ("out of sample error :", out_error) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(file_dir, "features.train")) testing_data = np.genfromtxt(os.path.join(file_dir, "features.test")) def convert_raw(t_data): return DataML((t_data[:, 1:], np.array(t_data[:, 0], dtype="int"))) initial_training_set = convert_raw(training_data) initial_testing_set = convert_raw(testing_data) def transform_help(transform, *col_data_sets): return [ DataML((transform(data_set.z), data_set.y)) for data_set in col_data_sets ] progress_iterator = ProgressIterator(4) progress_iterator.next() constant_training_set, constant_testing_set = transform_help( add_constant, initial_training_set, initial_testing_set) allexcept_constant_train_test_li = [ allexcept(digit, constant_training_set, constant_testing_set) for digit in range(10) ] no_transform_errors = [ train_test(*train_test_sets, minimize_error_aug, [1]) for train_test_sets in allexcept_constant_train_test_li ] in_sample_error_5_9 = [ error_list[0] for error_list in no_transform_errors[5:10] ] min_arg = np.argmin(in_sample_error_5_9) + 5 min_error = min(in_sample_error_5_9) que[7] = ("digit with lowest in sample error : ", str(min_arg) + ", " + str(min_error)) progress_iterator.next() second_order_training_set, second_order_testing_set = transform_help( second_order_nic, initial_training_set, initial_testing_set) allexcept_second_order_train_test_li = [ allexcept(digit, second_order_training_set, second_order_testing_set) for digit in range(10) ] transform_errors = [ train_test(*train_test_sets, minimize_error_aug, [1]) for train_test_sets in allexcept_second_order_train_test_li ] out_of_sample_error_0_4 = [ error_list[1] for error_list in transform_errors[:5] ] min_arg = np.argmin(out_of_sample_error_0_4) min_error = min(out_of_sample_error_0_4) que[8] = ("digit with lowest out of sample error : ", str(min_arg) + ", " + str(min_error)) tables = [[["no transform"] + no_transform_errors[i], ["transform"] + transform_errors[i]] for i in range(10)] pretty_tables = [ tabulate(table, headers=["", "in sample", "out of sample"]) for table in tables ] tables_string = "\n".join([ "\ndigit {}\n".format(i) + str(pretty_tables[i]) for i in range(len(pretty_tables)) ]) que[9] = ("effectiveness of feature transform on 0 and 9", tables_string) progress_iterator.next() one_v_five_second_order_sets = a_vs_b(1, 5, second_order_training_set, second_order_testing_set) errors_lambda = [ train_test(*one_v_five_second_order_sets, minimize_error_aug, [alpha]) for alpha in [0.01, 1] ] pretty_table = tabulate( [["lambda 0.01"] + errors_lambda[0], ["lambda 1"] + errors_lambda[1]], headers=["", "in sample", "out of sample"]) que[10] = ( "errors from changing lambda for 1 vs 5\n", "\n" + str(pretty_table) + "\n\nevidence of overfitting as increased constraint improves performance" ) total_support_vectors, in_sample_error = train_svc( transform_help(add_constant, svm_que_helper())[0], SVC(kernel="poly", degree=2, C=float("infinity"))) que[12] = ("total support vectors :", total_support_vectors) total_trials = 30 class SVC_REGULAR: def __init__(self, total_trials, k, gammas): self.total_hard_margin_svc_failure, \ self.svc_eout_li, \ self.reg_ein_li, \ self.reg_eout_li = trial(total_trials, k, gammas) progress_iterator.next() k9_g1x5 = SVC_REGULAR(total_trials, 9, 1.5 * np.ones(9)) que[13] = ("total hard margin svc failure percentage :", k9_g1x5.total_hard_margin_svc_failure / total_trials) que[14] = ("svc rbf better than regular rbf percentage (k=9):", sum(k9_g1x5.svc_eout_li < k9_g1x5.reg_eout_li) / len(k9_g1x5.svc_eout_li)) k12_g1x5 = SVC_REGULAR(total_trials, 12, 1.5 * np.ones(12)) que[15] = ("svc rbf better than regular rbf percentage (k=12):", sum(k12_g1x5.svc_eout_li < k12_g1x5.reg_eout_li) / len(k12_g1x5.svc_eout_li)) k9_better_k12_ein_percentage = sum( k9_g1x5.reg_ein_li < k12_g1x5.reg_ein_li) / len(k9_g1x5.reg_ein_li) k9_better_k12_eout_percentage = sum( k9_g1x5.reg_eout_li < k12_g1x5.reg_eout_li) / len(k9_g1x5.reg_eout_li) pretty_table = tabulate( [[k9_better_k12_ein_percentage, k9_better_k12_eout_percentage]], headers=[ "k=9 ein < k=12 ein percentage", "k=9 eout < k=12 eout percentage" ]) table = [[ np.mean(error_li) for error_li in [svc_regular.reg_ein_li, svc_regular.reg_eout_li] ] for svc_regular in [k9_g1x5, k12_g1x5]] pretty_table2 = tabulate( [["k=9"] + table[0], ["k=12"] + table[1]], headers=["", "in sample error", "out of sampler error"]) que[16] = ("regular rbf changing k", "\n" + str(pretty_table) \ + "\n" + str(pretty_table2)) k9_g2 = SVC_REGULAR(total_trials, 12, 2 * np.ones(12)) g1x5_better_g2_ein_percentage = sum( k9_g1x5.reg_ein_li < k9_g2.reg_ein_li) / len(k9_g1x5.reg_ein_li) g1x5_better_g2_eout_percentage = sum( k9_g1x5.reg_eout_li < k9_g2.reg_eout_li) / len(k9_g1x5.reg_eout_li) pretty_table = tabulate( [[g1x5_better_g2_ein_percentage, g1x5_better_g2_eout_percentage]], headers=[ "g=1.5 ein < g=2 ein percentage", "g=1.5 eout < g=2 eout percentage" ]) table = [[ np.mean(error_li) for error_li in [svc_regular.reg_ein_li, svc_regular.reg_eout_li] ] for svc_regular in [k9_g1x5, k9_g2]] pretty_table2 = tabulate( [["g=1.5"] + table[0], ["g=2"] + table[1]], headers=["", "in sample error", "out of sampler error"]) que[17] = ("regular rbf changing gammas", "\n" + str(pretty_table) \ + "\n" + str(pretty_table2)) zero_ein = k9_g1x5.reg_ein_li < 1 / (10 * total_trials) que[18] = ( "regular rbf (k=9, gamma=1.5) zero in sample error percentage : ", sum(zero_ein) / len(zero_ein)) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(hw6_dir_path, "in.dta")) testing_data = np.genfromtxt(os.path.join(hw6_dir_path, "out.dta")) progress_iterator = ProgressIterator(6) progress_iterator.next() inital_total = 25 # initial points used for training inital_model_weights = restricted_training(training_data, inital_total) validation_set = DataML(training_data[inital_total:], transform) best_k, out_of_sample_errors = best_model(inital_model_weights, validation_set) pretty_table = tabulate([[k, out_of_sample_errors[k - 3]] for k in range(3, 8)], headers=["k", "EOUT"]) que[1] = ("validation set out of sample errors, last 10 points", "\n" \ + str(pretty_table) ) progress_iterator.next() testing_set = DataML(testing_data, transform) best_k, out_of_sample_errors = best_model(inital_model_weights, testing_set) pretty_table = tabulate([[k, out_of_sample_errors[k - 3]] for k in range(3, 8)], headers=["k", "EOUT"]) que[2] = ("test set out of sample errors", "\n" \ + str(pretty_table) ) progress_iterator.next() first_error = min(out_of_sample_errors) reverse_total = 10 training_set = DataML(training_data[-reverse_total:], transform) reverse_model_weights = gen_models(training_set) best_k, out_of_sample_errors = best_model( reverse_model_weights, DataML(training_data[:-reverse_total], transform)) pretty_table = tabulate([[k, out_of_sample_errors[k - 3]] for k in range(3, 8)], headers=["k", "EOUT"]) que[3] = ("validation set out of sample errors, first 25 points", "\n" \ + str(pretty_table) ) progress_iterator.next() testing_set = DataML(testing_data, transform) best_k, out_of_sample_errors = best_model(reverse_model_weights, testing_set) pretty_table = tabulate([[k, out_of_sample_errors[k - 3]] for k in range(3, 8)], headers=["k", "EOUT"]) que[4] = ("test set out of sample errors", "\n" \ + str(pretty_table) ) second_error = min(out_of_sample_errors) que[5] = ("smallest out of sample errors :", str(first_error) + ", " + str(second_error)) progress_iterator.next() svm_better, total_support_vectors = experiment(trial, [10, 100], 1000) que[8] = ("svm better than pla : ", svm_better) progress_iterator.next() svm_better, total_support_vectors = experiment(trial, [100, 100], 1000) que[9] = ("svm better than pla : ", svm_better) que[10] = ("total support vectors : ", total_support_vectors) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(file_dir, "features.train")) testing_data = np.genfromtxt(os.path.join(file_dir, "features.test")) def convert_raw(t_data): return DataML((t_data[:,1:], np.array(t_data[:,0], dtype="int"))) initial_training_set = convert_raw(training_data) initial_testing_set = convert_raw(testing_data) def transform_help(transform, *col_data_sets): return [ DataML((transform(data_set.z), data_set.y)) for data_set in col_data_sets ] progress_iterator = ProgressIterator(4) progress_iterator.next() constant_training_set, constant_testing_set = transform_help( add_constant, initial_training_set, initial_testing_set) allexcept_constant_train_test_li = [ allexcept(digit, constant_training_set, constant_testing_set) for digit in range(10) ] no_transform_errors = [ train_test( *train_test_sets, minimize_error_aug, [1]) for train_test_sets in allexcept_constant_train_test_li ] in_sample_error_5_9 = [ error_list[0] for error_list in no_transform_errors[5:10] ] min_arg = np.argmin(in_sample_error_5_9) + 5 min_error = min(in_sample_error_5_9) que[7] = ("digit with lowest in sample error : ", str(min_arg) + ", " + str(min_error)) progress_iterator.next() second_order_training_set, second_order_testing_set = transform_help( second_order_nic, initial_training_set, initial_testing_set) allexcept_second_order_train_test_li = [ allexcept( digit, second_order_training_set, second_order_testing_set) for digit in range(10) ] transform_errors = [ train_test( *train_test_sets, minimize_error_aug, [1]) for train_test_sets in allexcept_second_order_train_test_li ] out_of_sample_error_0_4 = [ error_list[1] for error_list in transform_errors[:5] ] min_arg = np.argmin(out_of_sample_error_0_4) min_error = min(out_of_sample_error_0_4) que[8] = ("digit with lowest out of sample error : ", str(min_arg) + ", " + str(min_error)) tables = [ [ ["no transform"] + no_transform_errors[i], ["transform"] + transform_errors[i] ] for i in range(10) ] pretty_tables = [ tabulate( table, headers=["","in sample", "out of sample"]) for table in tables ] tables_string = "\n".join( ["\ndigit {}\n".format(i) + str(pretty_tables[i]) for i in range(len(pretty_tables)) ] ) que[9] = ("effectiveness of feature transform on 0 and 9", tables_string ) progress_iterator.next() one_v_five_second_order_sets = a_vs_b( 1, 5, second_order_training_set, second_order_testing_set) errors_lambda = [ train_test( *one_v_five_second_order_sets, minimize_error_aug, [alpha]) for alpha in [0.01, 1] ] pretty_table = tabulate( [ ["lambda 0.01"] + errors_lambda[0], ["lambda 1"] + errors_lambda[1] ], headers=["", "in sample", "out of sample"] ) que[10] = ("errors from changing lambda for 1 vs 5\n", "\n" + str(pretty_table) + "\n\nevidence of overfitting as increased constraint improves performance") total_support_vectors, in_sample_error = train_svc( transform_help(add_constant, svm_que_helper())[0], SVC(kernel="poly", degree=2, C=float("infinity"))) que[12] = ("total support vectors :", total_support_vectors) total_trials = 30 class SVC_REGULAR: def __init__(self, total_trials, k, gammas): self.total_hard_margin_svc_failure, \ self.svc_eout_li, \ self.reg_ein_li, \ self.reg_eout_li = trial(total_trials, k, gammas) progress_iterator.next() k9_g1x5 = SVC_REGULAR(total_trials, 9, 1.5 * np.ones(9)) que[13] = ("total hard margin svc failure percentage :", k9_g1x5.total_hard_margin_svc_failure / total_trials) que[14] = ("svc rbf better than regular rbf percentage (k=9):", sum(k9_g1x5.svc_eout_li < k9_g1x5.reg_eout_li) / len(k9_g1x5.svc_eout_li) ) k12_g1x5 = SVC_REGULAR(total_trials, 12, 1.5 * np.ones(12)) que[15] = ("svc rbf better than regular rbf percentage (k=12):", sum(k12_g1x5.svc_eout_li < k12_g1x5.reg_eout_li) / len(k12_g1x5.svc_eout_li) ) k9_better_k12_ein_percentage = sum(k9_g1x5.reg_ein_li < k12_g1x5.reg_ein_li) / len(k9_g1x5.reg_ein_li) k9_better_k12_eout_percentage = sum(k9_g1x5.reg_eout_li < k12_g1x5.reg_eout_li) / len(k9_g1x5.reg_eout_li) pretty_table = tabulate( [[k9_better_k12_ein_percentage, k9_better_k12_eout_percentage]], headers=["k=9 ein < k=12 ein percentage", "k=9 eout < k=12 eout percentage"]) table = [ [ np.mean(error_li) for error_li in [svc_regular.reg_ein_li, svc_regular.reg_eout_li] ] for svc_regular in [k9_g1x5, k12_g1x5] ] pretty_table2 = tabulate([["k=9"] + table[0], ["k=12"] + table[1]], headers=["", "in sample error", "out of sampler error"]) que[16] = ("regular rbf changing k", "\n" + str(pretty_table) \ + "\n" + str(pretty_table2)) k9_g2 = SVC_REGULAR(total_trials, 12, 2 * np.ones(12)) g1x5_better_g2_ein_percentage = sum(k9_g1x5.reg_ein_li < k9_g2.reg_ein_li) / len(k9_g1x5.reg_ein_li) g1x5_better_g2_eout_percentage = sum(k9_g1x5.reg_eout_li < k9_g2.reg_eout_li) / len(k9_g1x5.reg_eout_li) pretty_table = tabulate( [[g1x5_better_g2_ein_percentage, g1x5_better_g2_eout_percentage]], headers=["g=1.5 ein < g=2 ein percentage", "g=1.5 eout < g=2 eout percentage"]) table = [ [ np.mean(error_li) for error_li in [svc_regular.reg_ein_li, svc_regular.reg_eout_li] ] for svc_regular in [k9_g1x5, k9_g2] ] pretty_table2 = tabulate([["g=1.5"] + table[0], ["g=2"] + table[1]], headers=["", "in sample error", "out of sampler error"]) que[17] = ("regular rbf changing gammas", "\n" + str(pretty_table) \ + "\n" + str(pretty_table2)) zero_ein = k9_g1x5.reg_ein_li < 1 / (10 * total_trials ) que[18] = ("regular rbf (k=9, gamma=1.5) zero in sample error percentage : ", sum(zero_ein) / len(zero_ein)) return que
def simulations(): que = {} progress_iterator = ProgressIterator(5) progress_iterator.next() sample_size = ceil(solved_vc_inequality(1 - 0.95, 0.05, 400000)) que[1] = ("sample size needed :", sample_size) def error_bound_format(n): original_vc_bound, rademacher_penalty_bound, parrondo_van_den_broek_bound, devroye_bound = error_bound( n) output = ("Bounds for N=" + str(n), "\noriginal vc : " + str(original_vc_bound) + "\n" + "rademacher penalty : " + str(rademacher_penalty_bound) + "\n" + "parrondo and van den broek : " + str(parrondo_van_den_broek_bound) + "\n" + "devroye : " + str(devroye_bound) + "\n") return output progress_iterator.next() que[2] = error_bound_format(10000) progress_iterator.next() que[3] = error_bound_format(5) progress_iterator.next() analysis = bias_variance_out_sample_error(1000) def bias_variance_format(analysis): names = [ "constant : a", "\n\nline through origin : ax", "\n\nline : ax + b", "\n\nquadratic through origin : ax**2", "\n\nquadratic : ax**2 + b" ] output = "" for i in range(len(analysis)): if i == 1: output += names[i] \ + "\nmean parameters : " + str(analysis[i]["mean parameters"]) + " # ans to question 4 this differs from solution given" \ + "\nbias : " + str(analysis[i]["bias"]) + " # ans to question 5" \ + "\nvariance : " + str(analysis[i]["variance"]) + " # ans to question 6" \ + "\nexpected out of sample error : " + str(analysis[i]["expected out of sample error"]) else: output += names[i] \ + "\nmean parameters : " + str(analysis[i]["mean parameters"]) \ + "\nbias : " + str(analysis[i]["bias"]) \ + "\nvariance : " + str(analysis[i]["variance"]) \ + "\nexpected out of sample error : " + str(analysis[i]["expected out of sample error"]) output += "\n\nbest hypothesis is 'line throgh origin' with an expected out of sample error of " + str( round(analysis[1]["expected out of sample error"], 3)) return output progress_iterator.next() que[4] = ( "Also includes answers to question 5,6,7\n\nAnalysis of various hypotheses", "\n" + str(bias_variance_format(analysis))) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(hw6_dir_path, "in.dta")) testing_data = np.genfromtxt(os.path.join(hw6_dir_path, "out.dta")) progress_iterator = ProgressIterator(6) progress_iterator.next() inital_total = 25 # initial points used for training inital_model_weights = restricted_training(training_data, inital_total) validation_set = DataML(training_data[inital_total:], transform) best_k, out_of_sample_errors = best_model(inital_model_weights, validation_set) pretty_table = tabulate( [ [ k, out_of_sample_errors[k-3]] for k in range(3, 8) ], headers=["k", "EOUT"]) que[1] = ("validation set out of sample errors, last 10 points", "\n" \ + str(pretty_table) ) progress_iterator.next() testing_set = DataML(testing_data, transform) best_k, out_of_sample_errors = best_model(inital_model_weights, testing_set) pretty_table = tabulate( [ [ k, out_of_sample_errors[k-3]] for k in range(3, 8) ], headers=["k", "EOUT"]) que[2] = ("test set out of sample errors", "\n" \ + str(pretty_table) ) progress_iterator.next() first_error = min(out_of_sample_errors) reverse_total = 10 training_set = DataML(training_data[-reverse_total:], transform) reverse_model_weights = gen_models(training_set) best_k, out_of_sample_errors = best_model(reverse_model_weights, DataML(training_data[:-reverse_total], transform)) pretty_table = tabulate( [ [ k, out_of_sample_errors[k-3]] for k in range(3, 8) ], headers=["k", "EOUT"]) que[3] = ("validation set out of sample errors, first 25 points", "\n" \ + str(pretty_table) ) progress_iterator.next() testing_set = DataML(testing_data, transform) best_k, out_of_sample_errors = best_model(reverse_model_weights, testing_set) pretty_table = tabulate( [ [ k, out_of_sample_errors[k-3]] for k in range(3, 8) ], headers=["k", "EOUT"]) que[4] = ("test set out of sample errors", "\n" \ + str(pretty_table) ) second_error = min(out_of_sample_errors) que[5] = ("smallest out of sample errors :", str(first_error) + ", " + str(second_error)) progress_iterator.next() svm_better, total_support_vectors = experiment(trial, [10, 100], 1000) que[8] = ("svm better than pla : ", svm_better) progress_iterator.next() svm_better, total_support_vectors = experiment(trial, [100, 100], 1000) que[9] = ("svm better than pla : ", svm_better) que[10] = ("total support vectors : ", total_support_vectors) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(file_dir, "features.train")) testing_data = np.genfromtxt(os.path.join(file_dir, "features.test")) def convert_raw(t_data): return DataML((t_data[:,1:], np.array(t_data[:,0], dtype="int"))) training_set = convert_raw(training_data) testing_set = convert_raw(testing_data) progress_iterator = ProgressIterator(5) progress_iterator.next() results_even = [ trial_all_except(training_set, testing_set, digit, 'poly', 0.1, 2) for digit in range(0,9,2) ] in_sample_error_list_even = [ result[1][0] for result in results_even ] que[2] = ("digit with highest in sample error :", (np.argmax(in_sample_error_list_even) * 2 , np.max(in_sample_error_list_even)) ) results_odd = [ trial_all_except(training_set, testing_set, digit, 'poly', 0.1, 2) for digit in range(1,10,2) ] in_sample_error_list_odd = [ result[1][0] for result in results_odd ] que[3] = ("digit with lowest in sample error :", (np.argmin(in_sample_error_list_odd) * 2 + 1 , np.min(in_sample_error_list_odd)) ) support_vector_difference = abs( sum(results_even[np.argmax(in_sample_error_list_even)][0]) - sum(results_odd[np.argmin(in_sample_error_list_odd)][0])) que[4] = ("support vector difference :", support_vector_difference) progress_iterator.next() results = [ trial_a_vs_b(training_set, testing_set, 1, 5, 'poly', c, 2) for c in [0.001, 0.01, 0.1, 1] ] support_vectors = [ sum(result[0]) for result in results ] out_of_sample_errors = [ result[1][1] for result in results ] in_sample_errors = [ result[1][0] for result in results ] que[5] = ("various stats", "\n\tsupport vectors\n\t" + str(support_vectors) + "\n\tout of sample errors\n\t" + str(out_of_sample_errors) + "\n\tin sample errors\n\t" + str(in_sample_errors) ) progress_iterator.next() results = [ [trial_a_vs_b(training_set, testing_set, 1, 5, 'poly', c, degree) for c in [0.0001, 0.001 ,0.01, 0.1, 1]] for degree in range(2,6) ] results_transpose = [ [results[i][j] for i in range(len(results)) ] for j in range(len(results[0])) ] c_lowest_ein = [ result[1][0] for result in results_transpose[0] ] support_vectors = [ sum(result[0]) for result in results_transpose[1] ] c_third_lowest_ein = [ result[1][0] for result in results_transpose[0] ] c_highest_eou = [ result[1][1] for result in results_transpose[-1] ] que[6] = ("various stats", "\n\tin sample errors when c = 0.0001\n\t" + str(c_lowest_ein) + "\n\tsupport vectors when c = 0.001\n\t" + str(support_vectors) + "\n\tin sample errors when c = 0.01\n\t" + str(c_third_lowest_ein) + "\n\tout of sample errors when c = 1\n\t" + str(c_highest_eou) ) progress_iterator.next() results = [ best_c(training_set) for _ in range(50) ] frequency = np.bincount([ result[0] for result in results ]) que[7] = ("frequency :", frequency) best = np.argmax(frequency) average_score_of_best = np.mean([ result[1][best] for result in results ]) que[8] = ("average_score_of_best :", average_score_of_best) progress_iterator.next() results = [ trial_a_vs_b(training_set, testing_set, 1, 5, 'rbf', c) for c in [0.01, 1, 100, 10**4, 10**6 ] ] in_sample_errors = [ result[1][0] for result in results ] que[9] = ("in sample errors :", in_sample_errors) out_of_sample_errors = [ result[1][1] for result in results ] que[10] = ("out of sample errors :", out_of_sample_errors) return que
def simulations(): que = {} training_data = np.genfromtxt(os.path.join(file_dir, "features.train")) testing_data = np.genfromtxt(os.path.join(file_dir, "features.test")) def convert_raw(t_data): return DataML((t_data[:, 1:], np.array(t_data[:, 0], dtype="int"))) training_set = convert_raw(training_data) testing_set = convert_raw(testing_data) progress_iterator = ProgressIterator(5) progress_iterator.next() results_even = [ trial_all_except(training_set, testing_set, digit, 'poly', 0.1, 2) for digit in range(0, 9, 2) ] in_sample_error_list_even = [result[1][0] for result in results_even] que[2] = ("digit with highest in sample error :", (np.argmax(in_sample_error_list_even) * 2, np.max(in_sample_error_list_even))) results_odd = [ trial_all_except(training_set, testing_set, digit, 'poly', 0.1, 2) for digit in range(1, 10, 2) ] in_sample_error_list_odd = [result[1][0] for result in results_odd] que[3] = ("digit with lowest in sample error :", (np.argmin(in_sample_error_list_odd) * 2 + 1, np.min(in_sample_error_list_odd))) support_vector_difference = abs( sum(results_even[np.argmax(in_sample_error_list_even)][0]) - sum(results_odd[np.argmin(in_sample_error_list_odd)][0])) que[4] = ("support vector difference :", support_vector_difference) progress_iterator.next() results = [ trial_a_vs_b(training_set, testing_set, 1, 5, 'poly', c, 2) for c in [0.001, 0.01, 0.1, 1] ] support_vectors = [sum(result[0]) for result in results] out_of_sample_errors = [result[1][1] for result in results] in_sample_errors = [result[1][0] for result in results] que[5] = ("various stats", "\n\tsupport vectors\n\t" + str(support_vectors) + "\n\tout of sample errors\n\t" + str(out_of_sample_errors) + "\n\tin sample errors\n\t" + str(in_sample_errors)) progress_iterator.next() results = [[ trial_a_vs_b(training_set, testing_set, 1, 5, 'poly', c, degree) for c in [0.0001, 0.001, 0.01, 0.1, 1] ] for degree in range(2, 6)] results_transpose = [[results[i][j] for i in range(len(results))] for j in range(len(results[0]))] c_lowest_ein = [result[1][0] for result in results_transpose[0]] support_vectors = [sum(result[0]) for result in results_transpose[1]] c_third_lowest_ein = [result[1][0] for result in results_transpose[0]] c_highest_eou = [result[1][1] for result in results_transpose[-1]] que[6] = ("various stats", "\n\tin sample errors when c = 0.0001\n\t" + str(c_lowest_ein) + "\n\tsupport vectors when c = 0.001\n\t" + str(support_vectors) + "\n\tin sample errors when c = 0.01\n\t" + str(c_third_lowest_ein) + "\n\tout of sample errors when c = 1\n\t" + str(c_highest_eou)) progress_iterator.next() results = [best_c(training_set) for _ in range(50)] frequency = np.bincount([result[0] for result in results]) que[7] = ("frequency :", frequency) best = np.argmax(frequency) average_score_of_best = np.mean([result[1][best] for result in results]) que[8] = ("average_score_of_best :", average_score_of_best) progress_iterator.next() results = [ trial_a_vs_b(training_set, testing_set, 1, 5, 'rbf', c) for c in [0.01, 1, 100, 10**4, 10**6] ] in_sample_errors = [result[1][0] for result in results] que[9] = ("in sample errors :", in_sample_errors) out_of_sample_errors = [result[1][1] for result in results] que[10] = ("out of sample errors :", out_of_sample_errors) return que
def simulations(): que ={} progress_iterator = ProgressIterator(5) progress_iterator.next() sample_size = ceil(solved_vc_inequality(1 - 0.95, 0.05, 400000)) que[1] = ("sample size needed :", sample_size) def error_bound_format(n): original_vc_bound, rademacher_penalty_bound, parrondo_van_den_broek_bound, devroye_bound = error_bound(n) output = ("Bounds for N=" + str(n), "\noriginal vc : " + str(original_vc_bound) + "\n" + "rademacher penalty : " + str(rademacher_penalty_bound) + "\n" + "parrondo and van den broek : " + str(parrondo_van_den_broek_bound) + "\n" + "devroye : " + str(devroye_bound) + "\n" ) return output progress_iterator.next() que[2] = error_bound_format(10000) progress_iterator.next() que[3] = error_bound_format(5) progress_iterator.next() analysis = bias_variance_out_sample_error(1000) def bias_variance_format(analysis): names = [ "constant : a", "\n\nline through origin : ax", "\n\nline : ax + b", "\n\nquadratic through origin : ax**2", "\n\nquadratic : ax**2 + b"] output = "" for i in range(len(analysis)): if i == 1: output += names[i] \ + "\nmean parameters : " + str(analysis[i]["mean parameters"]) + " # ans to question 4 this differs from solution given" \ + "\nbias : " + str(analysis[i]["bias"]) + " # ans to question 5" \ + "\nvariance : " + str(analysis[i]["variance"]) + " # ans to question 6" \ + "\nexpected out of sample error : " + str(analysis[i]["expected out of sample error"]) else: output += names[i] \ + "\nmean parameters : " + str(analysis[i]["mean parameters"]) \ + "\nbias : " + str(analysis[i]["bias"]) \ + "\nvariance : " + str(analysis[i]["variance"]) \ + "\nexpected out of sample error : " + str(analysis[i]["expected out of sample error"]) output += "\n\nbest hypothesis is 'line throgh origin' with an expected out of sample error of " + str(round(analysis[1]["expected out of sample error"], 3)) return output progress_iterator.next() que[4] = ("Also includes answers to question 5,6,7\n\nAnalysis of various hypotheses", "\n" + str(bias_variance_format(analysis))) return que