def initialize(ratio): """ Initialize the data entry and conversion to numerical values Returns Training data and Testing Data Input : Ratio of Testing to Training X : Input Features Y : Output """ input_file = 'mushrooms_bfgs.data' input_test_file = '' custom_delimiter = ',' proportion_factor = ratio split = True input_columns = range(1, 23) output_column = 0 input_literal_columns = [1] * 23 input_label_mapping = {1:{'b':0, 'c':1, 'x':2, 'f':3, 'k':4, 's':5}, 2:{'f':0, 'g':1, 'y':2, 's':3}, 3:{'n':0, 'b':1, 'c':2, 'g':3, 'r':4, 'p':5, 'u':6, 'e':7, 'w':8, 'y':9}, 4:{'t':0, 'f':1}, 5:{'a':0, 'l':1, 'c':2, 'y':3, 'f':4, 'm':5, 'n':6, 'p':7, 's':8}, 6:{'a':0, 'd':1, 'f':2, 'n':3}, 7:{'c':0, 'w':1, 'd':2}, 8:{'b':0, 'n':1}, 9:{'k':0, 'n':1, 'b':2, 'h':3, 'g':4, 'r':5, 'o':6, 'p':7, 'u':8, 'e':9, 'w':10, 'y':11}, 10:{'e':0, 't':1}, 11:{'b':0, 'c':1, 'u':2, 'e':3, 'z':4, 'r':5, '?':6}, 12:{'f':0, 'y':1, 'k':2, 's':3}, 13:{'f':0, 'y':1, 'k':2, 's':3}, 14:{'n':0, 'b':1, 'c':2, 'g':3, 'o':4, 'p':5, 'e':6, 'w':7, 'y':8}, 15:{'n':0, 'b':1, 'c':2, 'g':3, 'o':4, 'p':5, 'e':6, 'w':7, 'y':8}, 16:{'p':0, 'u':1}, 17:{'n':0, 'o':1, 'w':2, 'y':3}, 18:{'n':0, 'o':1, 't':2}, 19:{'c':0, 'e':1, 'f':2, 'l':3, 'n':4, 'p':5, 's':6, 'z':7}, 20:{'k':0, 'n':1, 'b':2, 'h':3, 'r':4, 'o':5, 'u':6, 'w':7, 'y':8}, 21:{'a':0, 'c':1, 'n':2, 's':3, 'v':4, 'y':5}, 22:{'g':0, 'l':1, 'm':2, 'p':3, 'u':4, 'w':5, 'd':6}} output_literal = True output_label_mapping = {'p':1, 'e':0} return datareader.readInputData( input_file, input_test_file, True, custom_delimiter, proportion_factor, split, input_columns, output_column, input_literal_columns, input_label_mapping, output_literal, output_label_mapping)
}, 22: { 'g': 0, 'l': 1, 'm': 2, 'p': 3, 'u': 4, 'w': 5, 'd': 6 } } output_literal = True output_label_mapping = {'p': 1, 'e': 0} (train_X, train_y, test_X, test_y) = datareader.readInputData( input_file, input_test_file, True, custom_delimiter, proportion_factor, split, input_columns, output_column, input_literal_columns, input_label_mapping, output_literal, output_label_mapping) print "Parsing complete!\n" # Uncomment the following line to use PCA and to plot the training data set #plot_data(train_X, train_y) print "Optimizing...\n" initial_values = numpy.zeros((len(train_X[0]), 1)) myargs = (train_X, train_y) theta = scipy.optimize.fmin_bfgs(computeCost, x0=initial_values, args=myargs) print "Final theta: " print theta
raw_input("Press Enter to continue...") print "Parsing input data..." input_file = 'iris.data' input_test_file = '' custom_delimiter = ',' proportion_factor = float(1)/3 split = True input_columns = range(4) output_column = 4 input_literal_columns = [0] * 4 input_label_mapping = {} output_literal = True output_label_mapping = {'Iris-versicolor': 1, 'Iris-setosa': 0, 'Iris-virginica': 0} (train_X, train_y, test_X, test_y) = datareader.readInputData(input_file, input_test_file, True, custom_delimiter, proportion_factor, split, input_columns, output_column, input_literal_columns, input_label_mapping, output_literal, output_label_mapping) print "Parsing complete!\n" # Uncomment the following line to use PCA and to plot the training data set #plot_data(train_X, train_y) print "Optimizing...\n" initial_thetas = numpy.zeros((train_X.shape[1], 1)) myargs = (train_X, train_y) theta = scipy.optimize.fmin_bfgs(computeCost, x0=initial_thetas, args=myargs) print "Final theta: " print theta check_test_data(test_X, test_y, theta)