def average_error(design_columns, target_columns, N): error = 0 for i in range(0,N): splitter.create_training_and_test_sets('data/bodyfatdata.txt', training_ratio = 0.8) model = maximum_likelihood_model('data/bodyfat_training.txt', design_columns, target_columns) error += root_mean_square_error('data/bodyfat_test.txt', design_columns, target_columns, model) return error / float(N)
#!/usr/bin/python import classification as cla import regression as reg import split_data as spl import numpy as np if (raw_input("II.1 Split the bodyfat dataset into training and test sets? (y/n):") == "y"): spl.create_training_and_test_sets('data/bodyfatdata.txt') print "... Saved datasets as data/bodyfat_training.txt and data/bodyfat_test.txt" print if (raw_input("II1.1 Compute the RMS for the maximum likelihood solution? (y/n):") == "y"): ml_model1 = reg.maximum_likelihood_model('data/bodyfat_training.txt', [3, 6, 7, 8], [1]) ml_model2 = reg.maximum_likelihood_model('data/bodyfat_training.txt', [7], [1]) ml_error1 = reg.root_mean_square_error('data/bodyfat_test.txt', [3, 6, 7, 8], [1], ml_model1) ml_error2 = reg.root_mean_square_error('data/bodyfat_test.txt', [7], [1], ml_model2) print "The RMS error of the test set for selection 1:" print ml_error1 print "The RMS error of the test set for selection 2:" print ml_error2 print if (raw_input("II1.2 Compute the RMS for the maximum a posteriori solution? (y/n):") == "y"): reg.plot_alpha_error('data/bodyfat_training.txt','data/bodyfat_test.txt') print "Saved plots as images/rms_selection1.png and images/rms_selection2.png" print