def runModel_i(input_vector): hls, lr, ne, rs = input_vector model = init_two_layer_model( 32 * 32 * 3, int(hls), 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model = {} loss_history = [] train_acc_history = [] val_acc_history = [] best_model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=int(ne), reg=rs, momentum=0.9, learning_rate_decay=0.95, learning_rate=lr, verbose=True) print("Inside model I") print(val_acc_history) return best_model, loss_history, train_acc_history, val_acc_history
for hs in hidden_size: for ep in training_epochs: for reg in regs: for lr in learning_rate: print "test on param hs :", hs, " ep: ", ep, " reg: ", reg, " lr:", lr model = init_two_layer_model( 32 * 32 * 3, hs, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train( X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=ep, reg=reg, momentum=0.9, learning_rate_decay=0.95, learning_rate=lr, verbose=True) plt.subplot(2, 1, 1) plt.plot(loss_history) plt.title( 'Loss history with paramater hs: %d, reg: %f, lr: %lr' % (hs, reg, lr)) plt.xlabel('Iteration') plt.ylabel('Loss')
# y = np.random.randint(num_classes, size=num_inputs) # model = init_supercool_convnet(num_filters=3, filter_size=3, input_shape=input_shape) # loss, grads = supercool_convnet(X, model, y) # for param_name in sorted(grads): # f = lambda _: supercool_convnet(X, model, y)[0] # param_grad_num = eval_numerical_gradient(f, model[param_name], verbose=False, h=1e-6) # e = rel_error(param_grad_num, grads[param_name]) # print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])) # # Make sure we can overfit... # model = init_supercool_convnet(weight_scale=5e-2, bias_scale=0, filter_size=3) # weight_scale=5e-2 # trainer = ClassifierTrainer() # best_model, loss_history, train_acc_history, val_acc_history = trainer.train( # X_train[:50], y_train[:50], X_val, y_val, model, supercool_convnet, # reg=0.001, momentum=0.9, learning_rate=0.0001, batch_size=10, num_epochs=10, # change to 20 epochs # verbose=True) # batch size 40-100 model = init_supercool_convnet(weight_scale=3e-2, bias_scale=0, filter_size=3) trainer = ClassifierTrainer() best_model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, supercool_convnet, reg=0.5, momentum=0.9, learning_rate=5e-5, batch_size=50, num_epochs=15, # change to 20 epochs verbose=True, acc_frequency=50) # batch size 40-100 with open('best_model_2.pkl', 'wb') as f: cPickle.dump(best_model, f) # with open('best_model.pkl', 'rb') as f: # best_model = cPickle.load(f)
return X_train, y_train, X_val, y_val, X_test, y_test # Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() #print "three layer net with hypterparameter hidden[500,300], trainepoch 45, learning rate 1e-4" #model = init_three_layer_model(32*32*3,[500,300],10,maxout=3) model = init_two_layer_model(32*32*3,550,10) trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val, model, three_layer_net, # two_layer_net, num_epochs=45,reg=0.05,dropout=0.5, # update='sgd', momentum=0.9,learning_rate_decay=0.95, learning_rate=1e-4,verbose=True) plt.subplot(2, 1, 1) plt.plot(loss_history) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc) plt.plot(val_acc) plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right') plt.xlabel('Epoch')
# these should all be less than 1e-8 or so for param_name in grads: param_grad_num = eval_numerical_gradient(lambda W: two_layer_net(X, model, y, reg)[0], model[param_name], verbose=False) print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])) from cs231n.classifier_trainer import ClassifierTrainer model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.0, learning_rate_decay=1, update='sgd', sample_batches=False, num_epochs=100, verbose=False) print 'Final loss with vanilla SGD: %f' % (loss_history[-1], ) model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.9, learning_rate_decay=1, update='momentum', sample_batches=False, num_epochs=100,
# Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() #print "three layer net with hypterparameter hidden[500,300], trainepoch 45, learning rate 1e-4" #model = init_three_layer_model(32*32*3,[500,300],10,maxout=3) model = init_two_layer_model(32 * 32 * 3, 550, 10) trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train( X_train, y_train, X_val, y_val, model, three_layer_net, # two_layer_net, num_epochs=45, reg=0.05, dropout=0.5, # update='sgd', momentum=0.9, learning_rate_decay=0.95, learning_rate=1e-4, verbose=True) plt.subplot(2, 1, 1) plt.plot(loss_history) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2)
#By default it uses vanilla SGD, which you need to implement. #First, run the optimization below using Vanilla SGD: from cs231n.classifier_trainer import ClassifierTrainer model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.0, learning_rate_decay=1, update='sgd', sample_batches=False, num_epochs=100, verbose=False) print('Final loss with vanilla SGD: %f' % (loss_history[-1], )) ############################################################################### # Now read the momentum update rule inside #the `train` function, and run the same optimization as above but with the momentum #update. You should see a much better result in the final obtained loss: model = init_toy_model() trainer = ClassifierTrainer()
init_hyp['num_filters'] = random.randint(80, 100) train_hyp['learning_rate'] = 10**random.uniform(-4, -3) train_hyp['reg'] = 10**random.uniform(-8, -7) train_hyp['num_epochs'] = 0.5 train_hyp['momentum'] = 0.9 train_hyp['batch_size'] = 100 train_hyp['acc_frequency'] = 100 hyperparameters['init'] = init_hyp hyperparameters['train'] = train_hyp # train model model = init_two_layer_convnet(dtype=X_train.dtype, **hyperparameters['init']) trainer = ClassifierTrainer() model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, loss_function, **hyperparameters['train']) # store results train_acc = max(train_acc_history ) # the model returned corresponds to the best accuracy val_acc = max(val_acc_history) results[(val_acc, train_acc)] = hyperparameters if val_acc > best['val_acc']: best['model'] = model best['hyperparameters'] = hyperparameters best['val_acc'] = val_acc best['train_acc'] = train_acc best['val_acc_history'] = val_acc_history best['train_acc_history'] = train_acc_history best['loss_history'] = loss_history
init_hyp['filter_size'] = 3 init_hyp['num_filters'] = random.randint(80, 100) train_hyp['learning_rate'] = 10 ** random.uniform(-4, -3) train_hyp['reg'] = 10 ** random.uniform(-8, -7) train_hyp['num_epochs'] = 0.5 train_hyp['momentum'] = 0.9 train_hyp['batch_size'] = 100 train_hyp['acc_frequency'] = 100 hyperparameters['init'] = init_hyp hyperparameters['train'] = train_hyp # train model model = init_two_layer_convnet(dtype=X_train.dtype, **hyperparameters['init']) trainer = ClassifierTrainer() model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, loss_function, **hyperparameters['train']) # store results train_acc = max(train_acc_history) # the model returned corresponds to the best accuracy val_acc = max(val_acc_history) results[(val_acc, train_acc)] = hyperparameters if val_acc > best['val_acc']: best['model'] = model best['hyperparameters'] = hyperparameters best['val_acc'] = val_acc best['train_acc'] = train_acc best['val_acc_history'] = val_acc_history best['train_acc_history'] = train_acc_history best['loss_history'] = loss_history
training_epochs = [45] regs = [1e-2,1e-3,5e-3,1e-4,5e-4,2] learning_rate = [1e-3,1e-4,5e-5,1e-6,5e-6] best = 0 bmodel = None best_para = {} for hs in hidden_size: for ep in training_epochs: for reg in regs: for lr in learning_rate: print "test on param hs :",hs," ep: ",ep," reg: ",reg," lr:",lr model = init_two_layer_model(32*32*3, hs, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=ep, reg=reg, momentum=0.9, learning_rate_decay = 0.95, learning_rate=lr, verbose=True) plt.subplot(2,1,1) plt.plot(loss_history) plt.title('Loss history with paramater hs: %d, reg: %f, lr: %lr'%(hs,reg,lr)) plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc) plt.plot(val_acc) plt.legend(['Training accuracy', 'Validation accuracy'], loc='lower right') plt.xlabel('Epoch') plt.ylabel('Clasification accuracy')
print 'Validation data shape: ', X_val.shape print 'Validation labels shape: ', y_val.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape from cs231n.classifier_trainer import ClassifierTrainer ####don't forget init model every train nn #### model = init_elu_relu_model(32*32*3, 100, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() ne = 20 _, loss_relu,train_acc,val_acc= trainer.train(X_train, y_train, X_val, y_val, model, relu, num_epochs=ne, reg=1.0,update='rmsprop', momentum=0.9, learning_rate_decay = 0.95,batch_size=80, learning_rate=1e-5, verbose=True) model = init_elu_relu_model(32*32*3, 100, 10) _, loss_leaky,train_acc_1,val_acc_1= trainer.train(X_train, y_train, X_val, y_val, model, leaky, num_epochs=ne, reg=1.0,update='rmsprop',batch_size=80, momentum=0.9, learning_rate_decay = 0.95, learning_rate=1e-5, verbose=True) model = init_elu_relu_model(32*32*3, 100, 10) _, loss_elu,train_acc_2,val_acc_2= trainer.train(X_train, y_train, X_val, y_val, model, elu, num_epochs=ne, reg=1.0,update='rmsprop',batch_size=80, momentum=0.9, learning_rate_decay = 0.95, learning_rate=1e-5, verbose=True)
from cs231n.classifier_trainer import ClassifierTrainer ####don't forget init model every train nn #### model = init_elu_relu_model(32 * 32 * 3, 100, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() ne = 20 _, loss_relu, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val, model, relu, num_epochs=ne, reg=1.0, update='rmsprop', momentum=0.9, learning_rate_decay=0.95, batch_size=80, learning_rate=1e-5, verbose=True) model = init_elu_relu_model(32 * 32 * 3, 100, 10) _, loss_leaky, train_acc_1, val_acc_1 = trainer.train(X_train, y_train, X_val, y_val, model, leaky, num_epochs=ne, reg=1.0,