def grid_search(): X, Y = get_spiral() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] hidden_layer_sizes = [[300], [100, 100], [50, 50, 50]] learning_rates = [1e-4, 1e-3, 1e-2] l2_penalties = [0., 0.1, 1.0] best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for hls in hidden_layer_sizes: for lr in learning_rates: for l2 in l2_penalties: model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate = lr, reg = l2, mu = 0.99, epochs = 3000, show_fig = False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print("validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = hls best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # starting hyperparameters M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 # since we always want it to be positive max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_tries): model = ANN([M] * nHidden) model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M] * nHidden, log_lr, log_l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # select new hyperparams nHidden = best_nHidden + np.random.randint(-1, 2) # -1, 0, or 1 nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2) * 10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("best_M:", best_M) print("best_nHidden:", best_nHidden) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # starting hyperparameters M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 # since we always want it to be positive max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_tries): model = keras.models.Sequential() model.add(keras.layers.Dense(units=M, input_dim=Xtrain.shape[1], activation='relu', kernel_regularizer=keras.regularizers.l2(10**log_l2))) model.add(keras.layers.Dense(1, activation='sigmoid')) adam = keras.optimizers.Adam(lr=10**log_lr) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) hist = model.fit(Xtrain, Ytrain, epochs=3000, batch_size=300, verbose=0, validation_data=(Xtest, Ytest)) validation_accuracy = hist.history['val_acc'][-1] train_accuracy = hist.history['acc'][-1] print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M]*nHidden, log_lr, log_l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # select new hyperparams nHidden = best_nHidden + np.random.randint(-1, 2) # -1, 0, or 1 nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2)*10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("best_M:", best_M) print("best_nHidden:", best_nHidden) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # starting hyperparameters M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 # since we always want it to be positive max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_tries): model = ANN([M]*nHidden) model.fit( Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False ) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M]*nHidden, log_lr, log_l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # select new hyperparams nHidden = best_nHidden + np.random.randint(-1, 2) # -1, 0, or 1 nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2)*10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("best_M:", best_M) print("best_nHidden:", best_nHidden) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): X, Y = get_spiral() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] #starting parameter M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 max_iters = 30 best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_iters): model = ANN([M] * nHidden) model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M] * nHidden, log_lr, log_l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 nHidden = best_nHidden + np.random.randint(-1, 2) nHidden = max(1, nHidden) M = best_M + np.random.randint(-1, 2) * 10 M = max(10, M) log_lr = best_lr + np.random.randint(-1, 2) log_l2 = best_l2 + np.random.randint(-1, 2) print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("best_M:", best_M) print("best_nHidden:", best_nHidden) print("learning_rate:", best_lr) print("l2:", best_l2)
def grid_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(len(X) * 0.7) Xtrain, Ytrain = X[:Ntrain, ], Y[:Ntrain, ] Xtest, Ytest = X[Ntrain:, ], Y[ Ntrain:, ] # here is more like a validation set, because we use it to choose hyperparameter settings # hyperparameters to try hidden_layer_sizes = [ [300], [100, 100], [50, 50, 50], ] learning_rates = [1e-4, 1e-3, 1e-2] l2_penalties = [0., 0.1, 1.0] # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for hls in hidden_layer_sizes: for lr in learning_rates: for l2 in l2_penalties: model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = hls best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2)
def grid_search(): X, Y = get_spiral() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Xtest = X[:Ntrain], X[Ntrain:] Ytrain, Ytest = Y[:Ntrain], Y[Ntrain:] hidden_layer_sizes = [[300], [100, 100], [200, 200], [50, 50, 50]] learning_rates = [1e-4, 1e-3, 1e-2, 1e-5] regularizations = [0., 0.1, 1.0] best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for m in hidden_layer_sizes: for lr in learning_rates: for l2 in regularizations: model = ANN(m) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( f'validation_accuracy: {validation_accuracy}, train_accuracy: {train_accuracy}, setting hls,lr,l2: {m}, {lr}, {l2}' ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = m best_l2 = l2 best_lr = lr print(f'Best validation_accuracy: {best_validation_rate}') print('Best Setting:') print(f"Best hidden_layer_size, {best_hls}") print(f'Best learning rate: {best_lr}') print(f'Best regularizations : {best_l2}')
def grid_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # hyperparameters to try hidden_layer_sizes = [ [300], [100,100], [50,50,50], ] learning_rates = [1e-4, 1e-3, 1e-2] l2_penalties = [0., 0.1, 1.0] # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for hls in hidden_layer_sizes: for lr in learning_rates: for l2 in l2_penalties: model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = hls best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2)
def random_search(): X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # starting parameters M = 20 nHidden = 2 log_lr = -4 # use log values, since typical adjustments are .1, .01...., .000001 etc log_l2 = -2 max_tries = 30 # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for _ in range(max_tries): model = ANN([M]*nHidden) # raise 10 to log_lr and log_l2 power since they're in log scale model.fit(Xtrain, Ytrain, learning_rate=10**log_lr, reg=10**log_l2, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest, Ytest) train_accuracy = model.score(Xtrain, Ytrain) print("validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, [M]*nHidden], log_lr, log_l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_lr = log_lr best_l2 = log_l2 # Select new hyperparameters nHidden = best_nHidden * np.random.randint(-1,2) # -1, 0, or 1 nHidden = max(1,nHidden) M = best_M + np.random.randint(-1,2)*10 M = max(10,M) log_lr = best_lr + np.random.randint(-1,2) log_l2 = best_l2 + np.random.randint(-1,2)
def grid_search(): # get the data and split into train/test X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] # hyperparameters to try hidden_layer_sizes = [ [300], [100, 100], [50, 50, 50], ] learning_rates = [1e-4, 1e-3, 1e-2] l2_penalties = [0., 0.1, 1.0] # loop through all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for hls in hidden_layer_sizes: for lr in learning_rates: for l2 in l2_penalties: model = keras.models.Sequential() if len(hls) == 1: model.add( keras.layers.Dense( units=hls[0], input_dim=Xtrain.shape[1], activation='relu', kernel_regularizer=keras.regularizers.l2(l2))) if len(hls) == 2: model.add( keras.layers.Dense( hls[0], input_dim=2, activation='relu', kernel_regularizer=keras.regularizers.l2(l2))) model.add( keras.layers.Dense( hls[1], activation='relu', kernel_regularizer=keras.regularizers.l2(l2))) if len(hls) == 3: model.add( keras.layers.Dense( hls[0], input_dim=2, activation='relu', kernel_regularizer=keras.regularizers.l2(l2))) model.add( keras.layers.Dense( hls[1], activation='relu', kernel_regularizer=keras.regularizers.l2(l2))) model.add( keras.layers.Dense( hls[2], activation='relu', kernel_regularizer=keras.regularizers.l2(l2))) model.add(keras.layers.Dense(1, activation='sigmoid')) adam = keras.optimizers.Adam(lr=lr) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) hist = model.fit(Xtrain, Ytrain, epochs=3000, batch_size=300, verbose=0, validation_data=(Xtest, Ytest)) validation_accuracy = hist.history['val_acc'][-1] train_accuracy = hist.history['acc'][-1] # validation_accuracy = np.mean(np.equal(Ytest, np.round(model.predict(Xtest)))) # train_accuracy = np.mean(np.equal(Ytrain, np.round(model.predict(Xtrain)))) # validation_accuracy = model.evaluate(Xtest, Ytest) # train_accuracy = model.evaluate(Xtrain, Ytrain) # model = ANN(hls) # model.fit(Xtrain, Ytrain, learning_rate=lr, reg=l2, mu=0.99, epochs=3000, show_fig=False) # validation_accuracy = model.score(Xtest, Ytest) # train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy: %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2)) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_hls = hls best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2)
best_lr = lr best_l2 = l2 print("Best validation_accuracy:", best_validation_rate) print("Best settings:") print("hidden_layer_sizes:", best_hls) print("learning_rate:", best_lr) print("l2:", best_l2) # In[26]: grid_search() # In[15]: X, Y = get_spiral() # X, Y = get_clouds() X, Y = shuffle(X, Y) Ntrain = int(0.7 * len(X)) Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain] Xtest, Ytest = X[Ntrain:], Y[Ntrain:] model = keras.models.Sequential() model.add( keras.layers.Dense(300, input_dim=2, activation='relu', kernel_regularizer=keras.regularizers.l2(0))) model.add(keras.layers.Dense(1, activation='sigmoid')) adam = keras.optimizers.Adam(lr=0.01) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
def spiral(): X, Y = get_spiral() Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.33) kernel = lambda X1, X2: rbf(X1, X2, gamma=5.) return Xtrain, Xtest, Ytrain, Ytest, kernel, 1e-2, 300
def random_search(): max_iter = 30 X, Y = get_spiral() # plt.scatter(X[:,0],X[:,1]) # plt.show() X, Y = shuffle(X,Y) Ntrain = int(0.7*len(X)) Xtrain, Ytrain = X[:Ntrain,:], Y[:Ntrain] Xtest, Ytest = X[Ntrain:,:], Y[Ntrain:] #Starting parameters M = 20 nHidden = 2 log_lr = -4 log_l2 = -2 # best_M = M # best_nHidden = nHidden # best_log_lr = log_lr # best_log_l2 = log_l2 #LOOP thrugh all possible hyperparameter settings best_validation_rate = 0 best_hls = None best_lr = None best_l2 = None for i in xrange(max_iter): #PARAMETER SPACE hls = [M]*nHidden lr = 10**log_lr l2 = 10**log_l2 model = ANN(hls) model.fit(Xtrain, Ytrain, learning_rate=lr, reg=12, mu=0.99, epochs=3000, show_fig=False) validation_accuracy = model.score(Xtest,Ytest) train_accuracy = model.score(Xtrain, Ytrain) print( "validation_accuracy: %.3f, train_accuracy %.3f, settings: %s, %s, %s" % (validation_accuracy, train_accuracy, hls, lr, l2) ) if validation_accuracy > best_validation_rate: best_validation_rate = validation_accuracy best_M = M best_nHidden = nHidden best_log_lr = log_lr best_log_l2 = log_l2 best_hls = hls best_lr = lr best_l2 = l2 M = max(best_M + np.random.randint(-10, 10)*10, 20) nHidden = max(best_nHidden + np.random.randint(-2, 2), 1) log_lr = min(best_log_lr + np.random.randint(-1, 1), -1) log_l2 = min(best_log_l2 + np.random.randint(-1, 1), -1) print "M", M, "NHIDDEN", nHidden, "LOG-LR", log_lr, "LOG-L2", log_l2 print("Best validation_accuracy", best_validation_rate) print("Best settings:") print("hidden_layer_size:", best_hls) print("learning_rate:",best_lr) print("l2:",best_l2)
def grid_search(): X, Y = get_spiral()
def spiral(): X, Y = get_spiral() Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.33) kernel = lambda X1, X2: rbf(X1, X2, gamma=5.) return Xtrain, Xtest, Ytrain, Ytest, kernel, 1e-3, 500