def basic_iris(): iris = datasets.load_iris() scaler = pre.Scaler() X = scaler.fit_transform(iris.data) y = ut.all_to_sparse( iris.target, max(iris.target) + 1 ) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(np.array(X), np.array(y), "iris") X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) thetas, costs, val_costs = neur.gradient_decent(np.array(X), np.array(y), #hidden_layer_sz = 11, hidden_layer_sz = 20, iter = 8000, wd_coef = 0.0, learning_rate = 0.07, momentum_multiplier = 0.3, rand_init_epsilon = 0.12, do_early_stopping = True, #do_dropout = True, dropout_percentage = 0.9, do_learning_adapt = True, X_val = np.array(X_val), y_val = np.array(y_val)) h_x, a = neur.forward_prop(X_test, thetas) print "percentage correct predictions: ", ut.percent_equal(ut.map_to_max_binary_result(h_x), y_test) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def basic_gradient_descent(): data = np.genfromtxt('./stack_data_wide_val.csv', delimiter=',') X = data[:,:-1] y = data[:,-1:] scaler = pre.Scaler() X_val = scaler.fit_transform(X) y_val = np.array(map(lambda x: [0, 1] if x == 0 else [1, 0], y.flatten())) #X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(np.array(X), np.array(y), "basic_kaggle_data", True) #X_val = np.vstack([X_val, X_test]) #y_val = np.vstack([y_val, y_test]) hid_layer = 300 mg = neur.split_xy(neur.mini_batch_gen_from_file('stack_data_wide_train.csv', 40), -1, apply_x = lambda x: scaler.transform(x.astype(float)), apply_y = lambda y: np.array(map(lambda x: [0, 1] if x == 0 else [1, 0], y.flatten())) ) #bm = rbm.RBM(13408, hid_layer) #costs = bm.optimize(neur.just_x(mg), 1000, 0.0007, val_set = X_val) #first_layer_weights = np.hstack([np.zeros((hid_layer,1)), bm.weights]) #thetas = neur.create_initial_thetas([64, hid_layer, 2], 0.12) #thetas[0] = first_layer_weights # best so far minibatchsize 40 hidden layer 100 learning rate 0.01 thetas, costs, val_costs = neur.gradient_decent_gen(mg, #hidden_layer_sz = 11, hidden_layer_sz = hid_layer, iter = 20000, wd_coef = 0.0, learning_rate = 0.01, #thetas = thetas, momentum_multiplier = 0.9, rand_init_epsilon = 0.0012, do_early_stopping = True, #do_dropout = True, #dropout_percentage = 0.5, #do_learning_adapt = True, X_val = np.array(X_val), y_val = np.array(y_val) ) h_x, a = neur.forward_prop(X_val, thetas) binary_result = ut.map_to_max_binary_result(h_x) print "percentage correct predictions: ", ut.percent_equal(binary_result, y_val) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def autoencoder_example(): mnist_train = np.fromfile('mnist_training.csv', sep=" ") mnist_train = np.array(mnist_train.reshape(256, 1000)).transpose() mnist_targets = np.fromfile('mnist_training_targets.csv', sep=" ") mnist_targets = np.array(mnist_targets.reshape(10, 1000)).transpose() X = mnist_train y = mnist_targets X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(np.array(X), np.array(y), "digits_rbm_mnist_autoencode") X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) hid_layer = 300 autoenc = ac.Autoencoder(X.shape[1], hid_layer, denoise = True, denoise_percent = 0.5) costs, val_costs = autoenc.optimize(X, iters = 1500, learning_rate = 0.1, val_set = X_val) print "::: first encoding done :::" print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show() thetas = neur.create_initial_thetas([64, hid_layer, 10], 0.12) thetas[0] = autoenc.encode_weights thetas, costs, val_costs = neur.gradient_decent(X, y, learning_rate = 0.01, hidden_layer_sz = hid_layer, iter = 5000, thetas = thetas, X_val = X_val, y_val = y_val, do_dropout = True, dropout_percentage = 0.9, do_early_stopping = True) h_x, a = neur.forward_prop(X_val, thetas) print "percentage correct predictions: ", ut.percent_equal(ut.map_to_max_binary_result(h_x), y_val) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def rbm_example(): digits = datasets.load_digits() X = digits.images.reshape((digits.images.shape[0], -1)) X = (X / 16.0) y = ut.all_to_sparse(digits.target, max(digits.target) + 1) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets( np.array(X), np.array(y), "digits_rbm", True) X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) hid_layer = 300 bm = rbm.RBM(64, hid_layer) #exit() costs = bm.optimize(neur.mini_batch_generator(X), 2000, 0.08) print "validate squared_error", bm.validate(X_val) #exit() filename = './random_set_cache/data_rbm_run.pkl' first_layer_weights = np.hstack([np.zeros((hid_layer, 1)), bm.weights]) #pickle.dump(first_layer_weights, open(filename, 'w')) # first_layer_weights = pickle.load(open(filename, 'r')) thetas = neur.create_initial_thetas([64, hid_layer, 10], 0.12) thetas[0] = first_layer_weights thetas, costs, val_costs = neur.gradient_decent_gen( izip(neur.mini_batch_generator(X, 10), neur.mini_batch_generator(y, 10)), learning_rate=0.05, hidden_layer_sz=hid_layer, iter=8000, thetas=thetas, X_val=X_val, y_val=y_val, do_early_stopping=True) h_x, a = neur.forward_prop(X_test, thetas) print "percentage correct predictions: ", ut.percent_equal( ut.map_to_max_binary_result(h_x), y_test) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def rbm_example(): digits = datasets.load_digits() X = digits.images.reshape((digits.images.shape[0], -1)) X = (X / 16.0) y = ut.all_to_sparse( digits.target, max(digits.target) + 1 ) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(np.array(X), np.array(y), "digits_rbm", True) X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) hid_layer = 300 bm = rbm.RBM(64, hid_layer) #exit() costs = bm.optimize(neur.mini_batch_generator(X), 2000, 0.08) print "validate squared_error", bm.validate(X_val) #exit() filename = './random_set_cache/data_rbm_run.pkl' first_layer_weights = np.hstack([np.zeros((hid_layer,1)), bm.weights]) #pickle.dump(first_layer_weights, open(filename, 'w')) # first_layer_weights = pickle.load(open(filename, 'r')) thetas = neur.create_initial_thetas([64, hid_layer, 10], 0.12) thetas[0] = first_layer_weights thetas, costs, val_costs = neur.gradient_decent_gen(izip(neur.mini_batch_generator(X, 10), neur.mini_batch_generator(y, 10)), learning_rate = 0.05, hidden_layer_sz = hid_layer, iter = 8000, thetas = thetas, X_val = X_val, y_val = y_val, do_early_stopping = True) h_x, a = neur.forward_prop(X_test, thetas) print "percentage correct predictions: ", ut.percent_equal(ut.map_to_max_binary_result(h_x), y_test) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def basic_gradient_descent(): digits = datasets.load_digits() # iris = datasets.load_iris() X = digits.images.reshape((digits.images.shape[0], -1)) scaler = pre.Scaler() X = scaler.fit_transform(X) y = ut.all_to_sparse(digits.target, max(digits.target) + 1) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets( np.array(X), np.array(y), "basic_grad_descent_digits") X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) thetas, costs, val_costs = neur.gradient_decent_gen( izip(neur.mini_batch_generator(X, 10), neur.mini_batch_generator(y, 10)), #hidden_layer_sz = 11, hidden_layer_sz=100, iter=1000, wd_coef=0.0, learning_rate=0.1, momentum_multiplier=0.9, rand_init_epsilon=0.012, do_early_stopping=True, #do_dropout = True, #dropout_percentage = 0.8, #do_learning_adapt = True, X_val=np.array(X_val), y_val=np.array(y_val)) h_x, a = neur.forward_prop(X_test, thetas) binary_result = ut.map_to_max_binary_result(h_x) print "percentage correct predictions: ", ut.percent_equal( binary_result, y_test) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def basic_gradient_descent(): digits = datasets.load_digits() # iris = datasets.load_iris() X = digits.images.reshape((digits.images.shape[0], -1)) scaler = pre.Scaler() X = scaler.fit_transform(X) y = ut.all_to_sparse(digits.target, max(digits.target) + 1) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets( np.array(X), np.array(y), "basic_grad_descent_digits" ) X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) thetas, costs, val_costs = neur.gradient_decent_gen( izip(neur.mini_batch_generator(X, 10), neur.mini_batch_generator(y, 10)), # hidden_layer_sz = 11, hidden_layer_sz=100, iter=1000, wd_coef=0.0, learning_rate=0.1, momentum_multiplier=0.9, rand_init_epsilon=0.012, do_early_stopping=True, # do_dropout = True, # dropout_percentage = 0.8, # do_learning_adapt = True, X_val=np.array(X_val), y_val=np.array(y_val), ) h_x, a = neur.forward_prop(X_test, thetas) binary_result = ut.map_to_max_binary_result(h_x) print "percentage correct predictions: ", ut.percent_equal(binary_result, y_test) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label="cost") plt.plot(val_costs, label="val cost") plt.legend() plt.ylabel("error rate") plt.show()
def basic_gradient_descent(): digits = datasets.load_digits() # iris = datasets.load_iris() X = digits.images.reshape((digits.images.shape[0], -1)) scaler = pre.Scaler() X = scaler.fit_transform(X) y = ut.all_to_sparse(digits.target, max(digits.target) + 1) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets( gpu.as_garray(X), gpu.as_garray(y), "digits") X_val = gpu.concatenate([X_val, X_test]) y_val = gpu.concatenate([y_val, y_test]) thetas, costs, val_costs = neur.gradient_decent( gpu.as_garray(X), gpu.as_garray(y), #hidden_layer_sz = 11, hidden_layer_sz=45, iter=500, wd_coef=0.0, learning_rate=0.25, momentum_multiplier=0.9, rand_init_epsilon=0.012, do_early_stopping=True, #do_dropout = True, dropout_percentage=0.7, #do_learning_adapt = True, X_val=gpu.as_garray(X_val), y_val=gpu.as_garray(y_val)) h_x, a = neur.forward_prop(X_test, thetas) h_x = map(lambda x: x.as_numpy_array(), h_x) print "percentage correct predictions: ", ut.percent_equal( ut.map_to_max_binary_result(h_x), y_test.as_numpy_array()) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate')
def basic_iris(): iris = datasets.load_iris() scaler = pre.Scaler() X = scaler.fit_transform(iris.data) y = ut.all_to_sparse(iris.target, max(iris.target) + 1) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets( np.array(X), np.array(y), "iris") X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) thetas, costs, val_costs = neur.gradient_decent( np.array(X), np.array(y), #hidden_layer_sz = 11, hidden_layer_sz=20, iter=8000, wd_coef=0.0, learning_rate=0.07, momentum_multiplier=0.3, rand_init_epsilon=0.12, do_early_stopping=True, #do_dropout = True, dropout_percentage=0.9, do_learning_adapt=True, X_val=np.array(X_val), y_val=np.array(y_val)) h_x, a = neur.forward_prop(X_test, thetas) print "percentage correct predictions: ", ut.percent_equal( ut.map_to_max_binary_result(h_x), y_test) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def basic_gradient_descent(): digits = datasets.load_digits() # iris = datasets.load_iris() X = digits.images.reshape((digits.images.shape[0], -1)) scaler = pre.Scaler() X = scaler.fit_transform(X) y = ut.all_to_sparse( digits.target, max(digits.target) + 1 ) X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(gpu.as_garray(X), gpu.as_garray(y), "digits") X_val = gpu.concatenate([X_val, X_test]) y_val = gpu.concatenate([y_val, y_test]) thetas, costs, val_costs = neur.gradient_decent(gpu.as_garray(X), gpu.as_garray(y), #hidden_layer_sz = 11, hidden_layer_sz = 45, iter = 500, wd_coef = 0.0, learning_rate = 0.25, momentum_multiplier = 0.9, rand_init_epsilon = 0.012, do_early_stopping = True, #do_dropout = True, dropout_percentage = 0.7, #do_learning_adapt = True, X_val = gpu.as_garray(X_val), y_val = gpu.as_garray(y_val)) h_x, a = neur.forward_prop(X_test, thetas) h_x = map(lambda x: x.as_numpy_array(), h_x) print "percentage correct predictions: ", ut.percent_equal(ut.map_to_max_binary_result(h_x), y_test.as_numpy_array()) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate')
def rbm_mnist_example(): mnist_train = np.fromfile('mnist_training.csv', sep=" ") mnist_train = np.array(mnist_train.reshape(256, 1000)).transpose() mnist_targets = np.fromfile('mnist_training_targets.csv', sep=" ") mnist_targets = np.array(mnist_targets.reshape(10, 1000)).transpose() X = mnist_train y = mnist_targets X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(np.array(X), np.array(y), "digits_rbm_mnist") X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) hid_layer = 300 bm = rbm.RBM(256, hid_layer) #exit() costs = bm.optimize(X, 1000, 0.2, val_set = X_val) X = bm.prop_up(X) X_val = bm.prop_up(X_val) bm2 = rbm.RBM(hid_layer, hid_layer + 50) costs = bm2.optimize(X, 600, 0.2, val_set = X_val) X = bm2.prop_up(X) X_val = bm2.prop_up(X_val) bm3 = rbm.RBM(hid_layer + 50, hid_layer) costs = bm3.optimize(X, 600, 0.2, val_set = X_val) # lets change X filename = './random_set_cache/data_rbm_run_without_bias.pkl' first_layer_weights = np.hstack([np.zeros((hid_layer,1)), bm3.weights]) # without bias #first_layer_weights = np.hstack([bm.hidden_bias.reshape(hid_layer, 1), bm.weights]) # with bias #pickle.dump(first_layer_weights, open(filename, 'w')) #exit() #first_layer_weights = pickle.load(open(filename, 'r')) thetas = neur.create_initial_thetas([64, hid_layer, 10], 0.12) thetas[0] = first_layer_weights thetas, costs, val_costs = neur.gradient_decent(X, y, learning_rate = 0.1, hidden_layer_sz = hid_layer, iter = 3000, thetas = thetas, X_val = X_val, y_val = y_val, do_dropout = True, dropout_percentage = 0.7, do_early_stopping = True) h_x, a = neur.forward_prop(X_val, thetas) print "percentage correct predictions: ", ut.percent_equal(ut.map_to_max_binary_result(h_x), y_val) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def basic_gradient_descent(): data = np.genfromtxt('./stack_data_wide_val.csv', delimiter=',') X = data[:, :-1] y = data[:, -1:] scaler = pre.Scaler() X_val = scaler.fit_transform(X) y_val = np.array(map(lambda x: [0, 1] if x == 0 else [1, 0], y.flatten())) #X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets(np.array(X), np.array(y), "basic_kaggle_data", True) #X_val = np.vstack([X_val, X_test]) #y_val = np.vstack([y_val, y_test]) hid_layer = 300 mg = neur.split_xy(neur.mini_batch_gen_from_file( 'stack_data_wide_train.csv', 40), -1, apply_x=lambda x: scaler.transform(x.astype(float)), apply_y=lambda y: np.array( map(lambda x: [0, 1] if x == 0 else [1, 0], y.flatten()))) #bm = rbm.RBM(13408, hid_layer) #costs = bm.optimize(neur.just_x(mg), 1000, 0.0007, val_set = X_val) #first_layer_weights = np.hstack([np.zeros((hid_layer,1)), bm.weights]) #thetas = neur.create_initial_thetas([64, hid_layer, 2], 0.12) #thetas[0] = first_layer_weights # best so far minibatchsize 40 hidden layer 100 learning rate 0.01 thetas, costs, val_costs = neur.gradient_decent_gen( mg, #hidden_layer_sz = 11, hidden_layer_sz=hid_layer, iter=20000, wd_coef=0.0, learning_rate=0.01, #thetas = thetas, momentum_multiplier=0.9, rand_init_epsilon=0.0012, do_early_stopping=True, #do_dropout = True, #dropout_percentage = 0.5, #do_learning_adapt = True, X_val=np.array(X_val), y_val=np.array(y_val)) h_x, a = neur.forward_prop(X_val, thetas) binary_result = ut.map_to_max_binary_result(h_x) print "percentage correct predictions: ", ut.percent_equal( binary_result, y_val) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def rbm_mnist_example(): mnist_train = np.fromfile('mnist_training.csv', sep=" ") mnist_train = np.array(mnist_train.reshape(256, 1000)).transpose() mnist_targets = np.fromfile('mnist_training_targets.csv', sep=" ") mnist_targets = np.array(mnist_targets.reshape(10, 1000)).transpose() X = mnist_train y = mnist_targets X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets( np.array(X), np.array(y), "digits_rbm_mnist") X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) hid_layer = 300 bm = rbm.RBM(256, hid_layer) #exit() costs = bm.optimize(X, 1000, 0.2, val_set=X_val) X = bm.prop_up(X) X_val = bm.prop_up(X_val) bm2 = rbm.RBM(hid_layer, hid_layer + 50) costs = bm2.optimize(X, 600, 0.2, val_set=X_val) X = bm2.prop_up(X) X_val = bm2.prop_up(X_val) bm3 = rbm.RBM(hid_layer + 50, hid_layer) costs = bm3.optimize(X, 600, 0.2, val_set=X_val) # lets change X filename = './random_set_cache/data_rbm_run_without_bias.pkl' first_layer_weights = np.hstack([np.zeros((hid_layer, 1)), bm3.weights]) # without bias #first_layer_weights = np.hstack([bm.hidden_bias.reshape(hid_layer, 1), bm.weights]) # with bias #pickle.dump(first_layer_weights, open(filename, 'w')) #exit() #first_layer_weights = pickle.load(open(filename, 'r')) thetas = neur.create_initial_thetas([64, hid_layer, 10], 0.12) thetas[0] = first_layer_weights thetas, costs, val_costs = neur.gradient_decent(X, y, learning_rate=0.1, hidden_layer_sz=hid_layer, iter=3000, thetas=thetas, X_val=X_val, y_val=y_val, do_dropout=True, dropout_percentage=0.7, do_early_stopping=True) h_x, a = neur.forward_prop(X_val, thetas) print "percentage correct predictions: ", ut.percent_equal( ut.map_to_max_binary_result(h_x), y_val) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()
def autoencoder_example(): mnist_train = np.fromfile('mnist_training.csv', sep=" ") mnist_train = np.array(mnist_train.reshape(256, 1000)).transpose() mnist_targets = np.fromfile('mnist_training_targets.csv', sep=" ") mnist_targets = np.array(mnist_targets.reshape(10, 1000)).transpose() X = mnist_train y = mnist_targets X, y, X_val, y_val, X_test, y_test = neur.cross_validation_sets( np.array(X), np.array(y), "digits_rbm_mnist_autoencode") X_val = np.vstack([X_val, X_test]) y_val = np.vstack([y_val, y_test]) hid_layer = 300 autoenc = ac.Autoencoder(X.shape[1], hid_layer, denoise=True, denoise_percent=0.5) costs, val_costs = autoenc.optimize(X, iters=1500, learning_rate=0.1, val_set=X_val) print "::: first encoding done :::" print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show() thetas = neur.create_initial_thetas([64, hid_layer, 10], 0.12) thetas[0] = autoenc.encode_weights thetas, costs, val_costs = neur.gradient_decent(X, y, learning_rate=0.01, hidden_layer_sz=hid_layer, iter=5000, thetas=thetas, X_val=X_val, y_val=y_val, do_dropout=True, dropout_percentage=0.9, do_early_stopping=True) h_x, a = neur.forward_prop(X_val, thetas) print "percentage correct predictions: ", ut.percent_equal( ut.map_to_max_binary_result(h_x), y_val) print "training error:", costs[-1:][0] print "validation error:", val_costs[-1:][0] print "lowest validation error:", min(val_costs) plt.plot(costs, label='cost') plt.plot(val_costs, label='val cost') plt.legend() plt.ylabel('error rate') plt.show()