def grid_search(): X, Y = get_spiral() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] hidden_layer_sizes = [[300], [100, 100], [50, 50, 50]] learning_rates = [1e-4, 1e-3, 1e-2] l2_penalties = [0., 0.1, 1.0] best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for hls in hidden_layer_sizes: for lr in learning_rates: for l2 in l2_penalties: model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate = lr, reg = l2, mu = 0.99, epochs = 3000, show_fig = False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print("validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = hls best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # starting hyperparameters M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 # since we always want it to be positive max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_tries): model = ANN([M] * nHidden) model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M] * nHidden, log_lr, log_l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # select new hyperparams nHidden = best_nHidden + np.random.randint(-1, 2) # -1, 0, or 1 nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2) * 10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("best_M:", best_M) print("best_nHidden:", best_nHidden) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Xtest = X[:Ntrain], X[Ntrain:] Ytrain, Ytest = Y[:Ntrain], Y[Ntrain:] M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 max_tries = 30 best_nHidden = None best_validation_rate = 0 best_M = None best_lr = None best_l2 = None for _ in range(max_tries): model = ANN([M] * nHidden) model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( f'validation_accuracy: {validation_accuracy}, train_accuracy: {train_accuracy}, setting M: {M}, nHidden: {nHidden}, lr: {log_lr}, l2: {log_l2}' ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_l2 = log_l2 best_lr = log_lr # select new hyperparams nHidden = best_nHidden + np.random.randint(-1, 2) nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2) * 10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print(f'Best validation_accuracy: {best_validation_rate}') print('Best Setting:') print(f'Best hidden layer number: {best_nHidden}') print(f"Best hidden_layer_size, {best_M}") print(f'Best learning rate: {best_lr}') print(f'Best regularizations : {best_l2}')
def random_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # starting hyperparameters M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 # since we always want it to be positive max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_tries): model = ANN([M]*nHidden) model.fit( Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False ) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M]*nHidden, log_lr, log_l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # select new hyperparams nHidden = best_nHidden + np.random.randint(-1, 2) # -1, 0, or 1 nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2)*10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("best_M:", best_M) print("best_nHidden:", best_nHidden) print("learning_rate:", best_lr) print("l2:", best_l2)
def main(): X, Y = get_spiral() N, D = X.shape num_train = int(0.7*N) train_X = X[:num_train, :] train_Y = Y[:num_train] test_X = X[num_train:, :] test_Y = Y[num_train:] #initial hyperparameters M = 20 num_hidden_layers = 2 log_lr = -4 log_reg = -2 max_iter = 30 best_M = M best_num_hidden_layers = num_hidden_layers best_validation_rate = 0 best_hidden_layer_size = None best_learning_rate = None best_reg_penalty = None for i in range(max_iter): model = ANN([M] * num_hidden_layers) model.fit(train_X, train_Y, learning_rate=10**log_lr, reg=10**log_reg, mu=0.99, epochs=3000, show_fig=False) validation_rate = model.score(test_X, test_Y) train_accuracy = model.score(train_X, train_Y) print("validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_rate, train_accuracy, [M]*num_hidden_layers, log_lr, log_reg)) if validation_rate > best_validation_rate: best_M = M best_num_hidden_layers = num_hidden_layers best_validation_rate = validation_rate best_hidden_layer_size = [M] * num_hidden_layers best_learning_rate = log_lr best_reg_penalty = log_reg #update num_hidden_layers = best_num_hidden_layers + np.random.randint(-1, 2) M = best_M + np.random.randint(-1, 2) num_hidden_layers = max(10, num_hidden_layers) M = max(1, M) log_lr = best_learning_rate + np.random.randint(-1, 2) log_reg = best_reg_penalty + np.random.randint(-1, 2) log_lr = min(0, log_lr) log_reg = min(0, log_lr) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_M*best_num_hidden_layers) print("learning_rate:", 10**best_learning_rate) print("l2:", 10**best_reg_penalty)
def random_search(): X, Y = get_spiral() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] #starting parameter M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 max_iters = 30 best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_iters): model = ANN([M] * nHidden) model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M] * nHidden, log_lr, log_l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 nHidden = best_nHidden + np.random.randint(-1, 2) nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2) * 10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("best_M:", best_M) print("best_nHidden:", best_nHidden) print("learning_rate:", best_lr) print("l2:", best_l2)
def grid_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(len(X) * 0.7) Xtrain, Ytrain = X[:Ntrain, ], Y[:Ntrain, ] Xtest, Ytest = X[Ntrain:, ], Y[ Ntrain:, ] # here is more like a validation set, because we use it to choose hyperparameter settings # hyperparameters to try hidden_layer_sizes = [ [300], [100, 100], [50, 50, 50], ] learning_rates = [1e-4, 1e-3, 1e-2] l2_penalties = [0., 0.1, 1.0] # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for hls in hidden_layer_sizes: for lr in learning_rates: for l2 in l2_penalties: model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = hls best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2)
def grid_search(): X, Y = get_spiral() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Xtest = X[:Ntrain], X[Ntrain:] Ytrain, Ytest = Y[:Ntrain], Y[Ntrain:] hidden_layer_sizes = [[300], [100, 100], [200, 200], [50, 50, 50]] learning_rates = [1e-4, 1e-3, 1e-2, 1e-5] regularizations = [0., 0.1, 1.0] best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for m in hidden_layer_sizes: for lr in learning_rates: for l2 in regularizations: model = ANN(m) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( f'validation_accuracy: {validation_accuracy}, train_accuracy: {train_accuracy}, setting hls,lr,l2: {m}, {lr}, {l2}' ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = m best_l2 = l2 best_lr = lr print(f'Best validation_accuracy: {best_validation_rate}') print('Best Setting:') print(f"Best hidden_layer_size, {best_hls}") print(f'Best learning rate: {best_lr}') print(f'Best regularizations : {best_l2}')
def main(): X, Y = get_spiral() N, D = X.shape num_train = int(0.7 * N) train_X = X[:num_train, :] train_Y = Y[:num_train] test_X = X[num_train:, :] test_Y = Y[num_train:] #hyperparamters need to search hidden_layer_size = [[300], [100, 100], [50, 50, 50]] learning_rate = [1e-4, 1e-3, 1e-2] reg_penalties = [0., 0.1, 1.0] best_validation_rate = 0 best_hidden_layer_size = None best_learning_rate = None best_reg_penalties = None for hlz in hidden_layer_size: for lr in learning_rate: for rp in reg_penalties: model = ANN(hlz) model.fit(train_X, train_Y, learning_rate=lr, reg=rp, mu=0.99, epochs=3000, show_fig=False) validation_rate = model.score(test_X, test_Y) train_accuracy = model.score(train_X, train_Y) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_rate, train_accuracy, hlz, lr, rp)) if validation_rate > best_validation_rate: best_validation_rate = validation_rate best_hidden_layer_size = hlz best_learning_rate = lr best_reg_penalties = rp print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hidden_layer_size) print("learning_rate:", best_learning_rate) print("l2:", best_reg_penalties)
def grid_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # hyperparameters to try hidden_layer_sizes = [ [300], [100,100], [50,50,50], ] learning_rates = [1e-4, 1e-3, 1e-2] l2_penalties = [0., 0.1, 1.0] # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for hls in hidden_layer_sizes: for lr in learning_rates: for l2 in l2_penalties: model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = hls best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # starting parameters M = 20 nHidden = 2 log_lr = -4 # use log values, since typical adjustments are .1, .01...., .000001 etc log_l2 = -2 max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_tries): model = ANN([M]*nHidden) # raise 10 to log_lr and log_l2 power since they're in log scale model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print("validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M]*nHidden], log_lr, log_l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # Select new hyperparameters nHidden = best_nHidden * np.random.randint(-1,2) # -1, 0, or 1 nHidden = max(1,nHidden) M = best_M + np.random.randint(-1,2)*10 M = max(10,M) log_lr = best_lr + np.random.randint(-1,2) log_l2 = best_l2 + np.random.randint(-1,2)
def random_search(): X, Y, data = get_data() X, Y = shuffle(X, Y) Ntrain = int(0.75 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # Make copies of the small data (because variance matters?) Xtrain = np.concatenate((Xtrain, Xtrain, Xtrain), 0) Ytrain = np.concatenate((Ytrain, Ytrain, Ytrain), 0) # TODO: add some noise to the data ... add some noise to the weights (noise injection, should reduce overfitting) # Add gaussian noise, small variance. (np.random.randn(N)*0.5) = Gaussian w/ 0.5 variance print('size Xtrain: ' + str(Xtrain.shape)) print('size Ytrain: ' + str(Ytrain.shape)) print('size Xtest: ' + str(Xtest.shape)) print('size Ytest: ' + str(Ytest.shape)) # starting hyperparameters M = 20 # hidden units nHidden = 2 # hidden layers log_lr = -4 # learning rate log_l2 = -2 # l2 regularization, since we always want it to be positive max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None validation_accuracies = [] for _ in range(max_tries): print('on try: ' + str(_ + 1) + '/' + str(max_tries)) model = ANN([M] * nHidden) # choose params randomly on log base 10 scale model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=4000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s (layers), %s (log_lr), %s (log_l2)" % (validation_accuracy, train_accuracy, [M] * nHidden, log_lr, log_l2)) # keep track of all validation_accuracies.append(validation_accuracy) # keep the best parameters, then make modifications to them if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # select new hyperparams nHidden = best_nHidden + np.random.randint( -1, 2) # -1, 0, or 1, add, remove or keep same the layers nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2) * 10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) # TODO: save these in mongodb, then read them and see if we beat it, in a new file run forward on best params print("Best validation_accuracy:", best_validation_rate) print("Mean validation_accuracy:", np.mean(validation_accuracies)) print("Best settings:") print("Best M (hidden units):", best_M) print("Best nHidden (hidden layers):", best_nHidden) print("Best learning_rate:", best_lr) print("Best l2 regularization:", best_l2)
def random_search(): # get the data: df = pd.read_csv('Spirals_dataset.csv') data = df.as_matrix() X = data[:,:-1] Y = data[:,-1] # visualize the data: plt.scatter(X[:,0], X[:,1], c=Y) plt.title('Spirals') plt.axis('equal') plt.show() # split the data: Xtrain, Ytrain = X[:-270, :], Y[:-270] Xtest, Ytest = X[-270:, :], Y[-270:] # initial hyperparameters: M = 20 # number of hidden layer neurons num_hl = 2 log_lr = -4 # learning rate log_l2 = -2 # L2-regularization term max_tries = 1000 best_validation_score = 0 best_hl_size = None best_lr = None best_l2 = None t0 = datetime.now() # Random Search loop: for _ in range(max_tries): model = ANN([M]*num_hl) model.fit( Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.9, epochs=3000, show_fig=False ) validation_score = model.score(Xtest, Ytest) train_score = model.score(Xtrain, Ytrain) print('\nvalidation set accuracy: %.3f, training set accuracy: %.3f' % (validation_score, train_score)) print('hidden layer size: {}, learning rate: {}, l2: {}'.format([M]*num_hl, 10**log_lr, 10**log_l2)) if validation_score > best_validation_score: best_validation_score = validation_score best_hl_size = M best_num_hl = num_hl best_log_lr = log_lr best_log_l2 = log_l2 # update the hyperparameters: log_lr = best_log_lr + np.random.randint(-1, 2) log_l2 = best_log_l2 + np.random.randint(-1, 2) M = best_hl_size + np.random.randint(-1, 2)*10 # -10, 0, or 10 M = max(10, M) # in case if M has been updated to 0 num_hl = best_num_hl + np.random.randint(-1, 2) num_hl = max(1, num_hl) # in case if num_hl has been updated to 0 dt = datetime.now() - t0 print('\nElapsed time:', dt) print('\nBest validation accuracy:', best_validation_score) print('\nBest settings:') print('hidden layer size:', best_hl_size) print('number of hidden layers:', best_num_hl) print('learning rate:', 10**best_log_lr) print('l2:', 10**best_log_l2) print()
def random_search(): max_iter = 30 X, Y = get_spiral() # plt.scatter(X[:,0],X[:,1]) # plt.show() X, Y = shuffle(X,Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain,:], Y[:Ntrain] Xtest, Ytest = X[Ntrain:,:], Y[Ntrain:] #Starting parameters M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 # best_M = M # best_nHidden = nHidden # best_log_lr = log_lr # best_log_l2 = log_l2 #LOOP thrugh all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for i in xrange(max_iter): #PARAMETER SPACE hls = [M]*nHidden lr = 10**log_lr l2 = 10**log_l2 model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=12, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest,Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_log_lr = log_lr best_log_l2 = log_l2 best_hls = hls best_lr = lr best_l2 = l2 M = max(best_M + np.random.randint(-10, 10)*10, 20) nHidden = max(best_nHidden + np.random.randint(-2, 2), 1) log_lr = min(best_log_lr + np.random.randint(-1, 1), -1) log_l2 = min(best_log_l2 + np.random.randint(-1, 1), -1) print "M", M, "NHIDDEN", nHidden, "LOG-LR", log_lr, "LOG-L2", log_l2 print("Best validation_accuracy", best_validation_rate) print("Best settings:") print("hidden_layer_size:", best_hls) print("learning_rate:",best_lr) print("l2:",best_l2)
def grid_search(): # get the data: df = pd.read_csv('Spirals_dataset.csv') data = df.as_matrix() X = data[:, :-1] Y = data[:, -1] # visualize the data: plt.scatter(X[:, 0], X[:, 1], c=Y) plt.title('Spirals') plt.axis('equal') plt.show() # split the data: Xtrain, Ytrain = X[:-270, :], Y[:-270] Xtest, Ytest = X[-270:, :], Y[-270:] # hyperparameters to be tried: M = [[300], [100, 100], [50, 50, 50]] # number of hidden layer neurons learning_rates = [1e-4, 1e-3, 1e-2] # learning rate reg = [0., 1e-1, 1.0] # L2-regularization term best_validation_score = 0 best_hl_size = None best_lr = None best_l2 = None t0 = datetime.now() # Grid Search loops: for hl_size in M: for lr in learning_rates: for l2 in reg: model = ANN(hl_size) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.9, epochs=300, show_fig=False) validation_score = model.score(Xtest, Ytest) train_score = model.score(Xtrain, Ytrain) print( '\nvalidation set accuracy: %.3f, training set accuracy: %.3f' % (validation_score, train_score)) print( 'hidden layer size: {}, learning rate: {}, l2: {}'.format( hl_size, lr, l2)) if validation_score > best_validation_score: best_validation_score = validation_score best_hl_size = hl_size best_lr = lr best_l2 = l2 dt = datetime.now() - t0 print('\nElapsed time:', dt) print('\nBest validation accuracy:', best_validation_score) print('\nBest settings:') print('Best hidden layer size:', best_hl_size) print('Best learning rate:', best_lr) print('Best regularization term:', best_l2) print()