def q3(): # Initialize the values from the datasets boston25_x, boston25_y = prepare_boston25() boston50_x, boston50_y = prepare_boston50() # Number of folds: assignment specifies the value of 5 k_fold = 5 default_order = [ ('MyLogisticReg2', 'Boston50'), ('MyLogisticReg2', 'Boston25'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston25') ] methods = { ('MyLogisticReg2', 'Boston50'): (MyLogisticReg2(boston25_x.shape[1]), boston50_x, boston50_y), ('MyLogisticReg2', 'Boston25'): (MyLogisticReg2(boston50_x.shape[1]), boston25_x, boston25_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), boston50_x, boston50_y), ('LogisticRegression', 'Boston25'): (LogisticRegression(), boston25_x, boston25_y) } for key in default_order: name, dataset = key method, x, y = methods[key] # Using my implementation of cross validation instead of the built-in one scores = my_cross_val(method, x, y, k_fold) my_pretty_print(name, dataset, scores) print('==============')
def q3i(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q3i', ['Boston50', 'Boston75', 'Digits']) Boston50_X, Boston50_y = prepare_boston50() Boston75_X, Boston75_y = prepare_boston75() Digits_X, Digits_y = prepare_digits() default_order = [ ('LinearSVC', 'Boston50'), ('LinearSVC', 'Boston75'), ('LinearSVC', 'Digits'), ('SVC', 'Boston50'), ('SVC', 'Boston75'), ('SVC', 'Digits'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75'), ('LogisticRegression', 'Digits') ] methods = {('LinearSVC', 'Boston50'): (LinearSVC(), Boston50_X, Boston50_y), ('LinearSVC', 'Boston75'): (LinearSVC(), Boston75_X, Boston75_y), ('LinearSVC', 'Digits'): (LinearSVC(), Digits_X, Digits_y), ('SVC', 'Boston50'): (SVC(), Boston50_X, Boston50_y), ('SVC', 'Boston75'): (SVC(), Boston75_X, Boston75_y), ('SVC', 'Digits'): (SVC(), Digits_X, Digits_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X, Boston50_y), ('LogisticRegression', 'Boston75'): (LogisticRegression(), Boston75_X, Boston75_y), ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X, Digits_y)} if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)
def q3(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q3', ['Boston50', 'Boston75', 'Digits']) Boston50_X, Boston50_y = prepare_boston50() Boston75_X, Boston75_y = prepare_boston75() # Note that prepare_digits adds gaussian noise to the data to # avoid singlar covariance matrices. For details, see # datasets.prepare_digits Digits_X, Digits_y = prepare_digits() default_order = [ ('MultiGaussClassify', 'Boston50'), ('MultiGaussClassify', 'Boston75'), ('MultiGaussClassify', 'Digits'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75'), ('LogisticRegression', 'Digits') ] methods = { ('MultiGaussClassify', 'Boston50'): (MultiGaussClassify(), Boston50_X, Boston50_y), ('MultiGaussClassify', 'Boston75'): (MultiGaussClassify(), Boston75_X, Boston75_y), ('MultiGaussClassify', 'Digits'): (MultiGaussClassify(linear=False), Digits_X, Digits_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X, Boston50_y), ('LogisticRegression', 'Boston75'): (LogisticRegression(), Boston75_X, Boston75_y), ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X, Digits_y) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)
def q3(): # Initialize the values from the datasets boston25_x, boston25_y = prepare_boston25() boston50_x, boston50_y = prepare_boston50() # Number of folds: assignment specifies the value of 5 k_fold = 5 default_order = [('MySVM2 with m = 40', 'Boston50'), ('MySVM2 with m = 200', 'Boston50'), ('MySVM2 with m = n', 'Boston50'), ('LogisticRegression', 'Boston50'), ('MySVM2 with m = 40', 'Boston25'), ('MySVM2 with m = 200', 'Boston25'), ('MySVM2 with m = n', 'Boston25'), ('LogisticRegression', 'Boston25')] methods = { ('MySVM2 with m = 40', 'Boston50'): (MySVM2(boston50_x.shape[1], 500, 40), boston50_x, boston50_y), ('MySVM2 with m = 200', 'Boston50'): (MySVM2(boston50_x.shape[1], 500, 200), boston50_x, boston50_y), ('MySVM2 with m = n', 'Boston50'): # note that we deliberately pass boston50_x.shape[0] to trigger special case that makes batch size m = n (MySVM2(boston50_x.shape[1], 500, boston50_x.shape[0]), boston50_x, boston50_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), boston50_x, boston50_y), ('MySVM2 with m = 40', 'Boston25'): (MySVM2(boston25_x.shape[1], 500, 40), boston25_x, boston25_y), ('MySVM2 with m = 200', 'Boston25'): (MySVM2(boston25_x.shape[1], 500, 200), boston25_x, boston25_y), ('MySVM2 with m = n', 'Boston25'): # note that we deliberately pass boston25_x.shape[0] to trigger special case that makes batch size m = n (MySVM2(boston25_x.shape[1], 500, boston25_x.shape[0]), boston25_x, boston25_y), ('LogisticRegression', 'Boston25'): (LogisticRegression(), boston25_x, boston25_y) } for key in default_order: name, dataset = key method, x, y = methods[key] # Using my implementation of cross validation instead of the built-in one scores = my_cross_val(method, x, y, k_fold) my_pretty_print(name, dataset, scores) print('==============')
def q3(): # Initialize the values from the datasets Boston50_X, Boston50_y, Boston50_k, Boston50_d = prepare_boston50() Boston25_X, Boston25_y, Boston25_k, Boston25_d = prepare_boston25() Digits_X, Digits_y, Digits_k, Digits_d = prepare_digits() default_order = [ ('MultiGaussClassify with full covariance matrix', 'Boston50'), ('MultiGaussClassify with full covariance matrix', 'Boston25'), ('MultiGaussClassify with full covariance matrix', 'Digits'), ('MultiGaussClassify with diagonal covariance matrix', 'Boston50'), ('MultiGaussClassify with diagonal covariance matrix', 'Boston25'), ('MultiGaussClassify with diagonal covariance matrix', 'Digits'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston25'), ('LogisticRegression', 'Digits') ] methods = { ('MultiGaussClassify with full covariance matrix', 'Boston50'): (MultiGaussClassify(Boston50_k, Boston50_d), Boston50_X, Boston50_y), ('MultiGaussClassify with full covariance matrix', 'Boston25'): (MultiGaussClassify(Boston25_k, Boston25_d), Boston25_X, Boston25_y), ('MultiGaussClassify with full covariance matrix', 'Digits'): (MultiGaussClassify(Digits_k, Digits_d), Digits_X, Digits_y), ('MultiGaussClassify with diagonal covariance matrix', 'Boston50'): (MultiGaussClassify(Boston50_k, Boston50_d, True), Boston50_X, Boston50_y), ('MultiGaussClassify with diagonal covariance matrix', 'Boston25'): (MultiGaussClassify(Boston25_k, Boston25_d, True), Boston25_X, Boston25_y), ('MultiGaussClassify with diagonal covariance matrix', 'Digits'): (MultiGaussClassify(Digits_k, Digits_d, True), Digits_X, Digits_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X, Boston50_y), ('LogisticRegression', 'Boston25'): (LogisticRegression(), Boston25_X, Boston25_y), ('LogisticRegression', 'Digits'): (LogisticRegression(), Digits_X, Digits_y) } for key in default_order: name, dataset = key method, X, y = methods[key] # Using my implementation of cross validation instead of the built-in one scores = my_cross_val(method, X, y, 5) my_pretty_print(name, dataset, scores)
def q3(argv=None): dataset, method_name, k, latex = wrapper_args( argv, 'q3', ['Boston50', 'Boston75'], ['MyFLDA2', 'LogisticRegression']) Boston50_X, Boston50_y = prepare_boston50() Boston75_X, Boston75_y = prepare_boston75() default_order = [('MyFLDA2', 'Boston50'), ('MyFLDA2', 'Boston75'), ('LogisticRegression', 'Boston50'), ('LogisticRegression', 'Boston75')] # Find the optimal separation for the training set print('Finding optimal threshold for MyFLDA2 on Boston50 ...') myflda_boston50 = find_best_myflda2(MyFLDA2, Boston50_X, Boston50_y, k) print('Finding optimal threshold for MyFLDA2 on Boston75 ...') myflda_boston75 = find_best_myflda2(MyFLDA2, Boston75_X, Boston75_y, k) print('Done.') methods = { ('MyFLDA2', 'Boston50'): (myflda_boston50, Boston50_X, Boston50_y), ('MyFLDA2', 'Boston75'): (myflda_boston75, Boston75_X, Boston75_y), ('LogisticRegression', 'Boston50'): (LogisticRegression(), Boston50_X, Boston50_y), ('LogisticRegression', 'Boston75'): (LogisticRegression(), Boston75_X, Boston75_y) } if dataset == 'all': order = default_order else: order = [(method_name, dataset)] for key in order: name, dataset = key method, X, y = methods[key] print('==============') print('method: {}, dataset: {}'.format(key[0], key[1])) scores = my_cross_val(method, X, y, k) report(name, dataset, scores, latex=latex)