def logistic(InputFileName): raw_data = load_data(InputFileName) all_normalised_data = append_classifications(raw_data) all_normalised_data = normalise(all_normalised_data) # all_normalised_data = all_normalised_data training_data = append_features(all_normalised_data) expander = FeatureExpander(training_data) inclusion_list = [] inclusion_list.append(0) # last sv inclusion_list.append(0) # last change in sv inclusion_list.append(0) # mean of prev 10 rows sv inclusion_list.append(0) # std dev of prev 10 rows sv inclusion_list.append(0) # last sp inclusion_list.append(0) # last change in sp inclusion_list.append(0) # mean of prev 10 rows sp inclusion_list.append(0) # std dev of prev 10 rows sp expanded = expander.expand_features(inclusion_list) write_to_file(expanded, fp_out) [expanded_CV1, expanded_CV2] = split_data(expanded) THETA_CV1 = regression(expanded_CV1) THETA_CV2 = regression(expanded_CV2) print THETA_CV1 print THETA_CV2 return evaluate(expanded_CV1, expanded_CV2, THETA_CV1, THETA_CV2)
def linear(InputFileName): raw_data = load_data(InputFileName) training_data = append_features(raw_data) expander = FeatureExpander(training_data) inclusion_list = [] inclusion_list.append(2) # last sv inclusion_list.append(0) # last change in sv inclusion_list.append(0) # mean of prev 10 rows sv inclusion_list.append(0) # std dev of prev 10 rows sv inclusion_list.append(0) # last sp inclusion_list.append(0) # last change in sp inclusion_list.append(0) # mean of prev 10 rows sp inclusion_list.append(0) # std dev of prev 10 rows sp expanded = expander.expand_features(inclusion_list) write_to_file(expanded, fp_out) [expanded_CV1, expanded_CV2] = split_data(expanded) THETA_CV1 = regression(expanded_CV1) THETA_CV2 = regression(expanded_CV2) result = evaluate(expanded_CV1,expanded_CV2,THETA_CV1,THETA_CV2) print THETA_CV1 print THETA_CV2 return result
def reglogistic(InputFileName, inclusion_list = None): raw_data = load_data(InputFileName) all_normalised_data = append_classifications(raw_data) all_normalised_data = normalise(all_normalised_data) #all_normalised_data = all_normalised_data training_data = append_features(all_normalised_data) expander = FeatureExpander(training_data) if not inclusion_list: inclusion_list = [] inclusion_list.append(0) # last sv inclusion_list.append(0) # last change in sv inclusion_list.append(0) # mean of prev 10 rows sv inclusion_list.append(0) # std dev of prev 10 rows sv inclusion_list.append(0) # last sp inclusion_list.append(1) # last change in sp inclusion_list.append(0) # mean of prev 10 rows sp inclusion_list.append(1) # std dev of prev 10 rows sp expanded = expander.expand_features(inclusion_list) write_to_file(expanded, fp_out) [expanded_CV1, expanded_CV2, expanded_test] = split_data_3_folds(expanded) results = [] lamb_resolution = 5 for lamb in [i/lamb_resolution for i in range(1,lamb_resolution)]: print lamb THETA_CV1 = unflatten_theta(regression(expanded_CV1, lamb)) THETA_CV2 = unflatten_theta(regression(expanded_CV2, lamb)) results.append((THETA_CV1, lamb, percentage_correct_classifications(THETA_CV1, expanded_CV2))) results.append((THETA_CV2, lamb, percentage_correct_classifications(THETA_CV2, expanded_CV1))) best_result = results[0] for result in results: if result[2] > best_result[2]: best_result = result final_score = percentage_correct_classifications(best_result[0], expanded_test) return (best_result, final_score)
def reglinear(InputFileName, inclusion_list = None): raw_data = load_data(InputFileName) training_data = append_features(raw_data) expander = FeatureExpander(training_data) if not inclusion_list: inclusion_list = [] inclusion_list.append(1) # last sv inclusion_list.append(1) # last change in sv inclusion_list.append(0) # mean of prev 10 rows sv inclusion_list.append(1) # std dev of prev 10 rows sv inclusion_list.append(0) # last sp inclusion_list.append(0) # last change in sp inclusion_list.append(2) # mean of prev 10 rows sp inclusion_list.append(0) # std dev of prev 10 rows sp expanded = expander.expand_features(inclusion_list) write_to_file(expanded, fp_out) [expanded_CV1, expanded_CV2, expanded_test] = split_data_3_folds(expanded) results = [] lamb_resolution = 100 for lamb in [i/lamb_resolution for i in range(1,lamb_resolution)]: print lamb THETA_CV1 = regression(expanded_CV1, lamb) THETA_CV2 = regression(expanded_CV2, lamb) results.append((THETA_CV1, lamb, calc_mse(expanded_CV2, THETA_CV1))) results.append((THETA_CV2, lamb, calc_mse(expanded_CV1, THETA_CV2))) best_result = results[0] for result in results: if result[2] < best_result[2]: best_result = result final_score = calc_mse(expanded_test, best_result[0]) return (best_result, final_score)