def runModel_i(input_vector): hls, lr, ne, rs = input_vector model = init_two_layer_model( 32 * 32 * 3, int(hls), 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model = {} loss_history = [] train_acc_history = [] val_acc_history = [] best_model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=int(ne), reg=rs, momentum=0.9, learning_rate_decay=0.95, learning_rate=lr, verbose=True) print("Inside model I") print(val_acc_history) return best_model, loss_history, train_acc_history, val_acc_history
hidden_size = [600, 800, 1000, 1200, 1400, 2000] training_epochs = [45] regs = [1e-2, 1e-3, 5e-3, 1e-4, 5e-4, 2] learning_rate = [1e-3, 1e-4, 5e-5, 1e-6, 5e-6] best = 0 bmodel = None best_para = {} for hs in hidden_size: for ep in training_epochs: for reg in regs: for lr in learning_rate: print "test on param hs :", hs, " ep: ", ep, " reg: ", reg, " lr:", lr model = init_two_layer_model( 32 * 32 * 3, hs, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train( X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=ep, reg=reg, momentum=0.9, learning_rate_decay=0.95, learning_rate=lr, verbose=True) plt.subplot(2, 1, 1)
# y = np.random.randint(num_classes, size=num_inputs) # model = init_supercool_convnet(num_filters=3, filter_size=3, input_shape=input_shape) # loss, grads = supercool_convnet(X, model, y) # for param_name in sorted(grads): # f = lambda _: supercool_convnet(X, model, y)[0] # param_grad_num = eval_numerical_gradient(f, model[param_name], verbose=False, h=1e-6) # e = rel_error(param_grad_num, grads[param_name]) # print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])) # # Make sure we can overfit... # model = init_supercool_convnet(weight_scale=5e-2, bias_scale=0, filter_size=3) # weight_scale=5e-2 # trainer = ClassifierTrainer() # best_model, loss_history, train_acc_history, val_acc_history = trainer.train( # X_train[:50], y_train[:50], X_val, y_val, model, supercool_convnet, # reg=0.001, momentum=0.9, learning_rate=0.0001, batch_size=10, num_epochs=10, # change to 20 epochs # verbose=True) # batch size 40-100 model = init_supercool_convnet(weight_scale=3e-2, bias_scale=0, filter_size=3) trainer = ClassifierTrainer() best_model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, supercool_convnet, reg=0.5, momentum=0.9, learning_rate=5e-5, batch_size=50, num_epochs=15, # change to 20 epochs verbose=True, acc_frequency=50) # batch size 40-100 with open('best_model_2.pkl', 'wb') as f: cPickle.dump(best_model, f) # with open('best_model.pkl', 'rb') as f: # best_model = cPickle.load(f)
# Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test # Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() #print "three layer net with hypterparameter hidden[500,300], trainepoch 45, learning rate 1e-4" #model = init_three_layer_model(32*32*3,[500,300],10,maxout=3) model = init_two_layer_model(32*32*3,550,10) trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val, model, three_layer_net, # two_layer_net, num_epochs=45,reg=0.05,dropout=0.5, # update='sgd', momentum=0.9,learning_rate_decay=0.95, learning_rate=1e-4,verbose=True) plt.subplot(2, 1, 1) plt.plot(loss_history) plt.title('Loss history') plt.xlabel('Iteration') plt.ylabel('Loss')
# Use numeric gradient checking to check your implementation of the backward pass. # If your implementation is correct, the difference between the numeric and # analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2. loss, grads = two_layer_net(X, model, y, reg) # these should all be less than 1e-8 or so for param_name in grads: param_grad_num = eval_numerical_gradient(lambda W: two_layer_net(X, model, y, reg)[0], model[param_name], verbose=False) print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name])) from cs231n.classifier_trainer import ClassifierTrainer model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.0, learning_rate_decay=1, update='sgd', sample_batches=False, num_epochs=100, verbose=False) print 'Final loss with vanilla SGD: %f' % (loss_history[-1], ) model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data)
# Reshape data to rows X_train = X_train.reshape(num_training, -1) X_val = X_val.reshape(num_validation, -1) X_test = X_test.reshape(num_test, -1) return X_train, y_train, X_val, y_val, X_test, y_test # Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() #print "three layer net with hypterparameter hidden[500,300], trainepoch 45, learning rate 1e-4" #model = init_three_layer_model(32*32*3,[500,300],10,maxout=3) model = init_two_layer_model(32 * 32 * 3, 550, 10) trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train( X_train, y_train, X_val, y_val, model, three_layer_net, # two_layer_net, num_epochs=45, reg=0.05, dropout=0.5, # update='sgd', momentum=0.9, learning_rate_decay=0.95, learning_rate=1e-4,
print('%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))) ############################################################################### # # Train the network # To train the network we will use SGD with Momentum. #Open the file `classifier_trainer.py` and familiarize yourself with the `ClassifierTrainer` #class. It performs optimization given an arbitrary cost function data, and model. #By default it uses vanilla SGD, which you need to implement. #First, run the optimization below using Vanilla SGD: from cs231n.classifier_trainer import ClassifierTrainer model = init_toy_model() trainer = ClassifierTrainer() # call the trainer to optimize the loss # Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data) best_model, loss_history, _, _ = trainer.train(X, y, X, y, model, two_layer_net, reg=0.001, learning_rate=1e-1, momentum=0.0, learning_rate_decay=1, update='sgd', sample_batches=False, num_epochs=100,
train_hyp = {} init_hyp['filter_size'] = 3 init_hyp['num_filters'] = random.randint(80, 100) train_hyp['learning_rate'] = 10**random.uniform(-4, -3) train_hyp['reg'] = 10**random.uniform(-8, -7) train_hyp['num_epochs'] = 0.5 train_hyp['momentum'] = 0.9 train_hyp['batch_size'] = 100 train_hyp['acc_frequency'] = 100 hyperparameters['init'] = init_hyp hyperparameters['train'] = train_hyp # train model model = init_two_layer_convnet(dtype=X_train.dtype, **hyperparameters['init']) trainer = ClassifierTrainer() model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, loss_function, **hyperparameters['train']) # store results train_acc = max(train_acc_history ) # the model returned corresponds to the best accuracy val_acc = max(val_acc_history) results[(val_acc, train_acc)] = hyperparameters if val_acc > best['val_acc']: best['model'] = model best['hyperparameters'] = hyperparameters best['val_acc'] = val_acc best['train_acc'] = train_acc best['val_acc_history'] = val_acc_history
init_hyp = {} train_hyp = {} init_hyp['filter_size'] = 3 init_hyp['num_filters'] = random.randint(80, 100) train_hyp['learning_rate'] = 10 ** random.uniform(-4, -3) train_hyp['reg'] = 10 ** random.uniform(-8, -7) train_hyp['num_epochs'] = 0.5 train_hyp['momentum'] = 0.9 train_hyp['batch_size'] = 100 train_hyp['acc_frequency'] = 100 hyperparameters['init'] = init_hyp hyperparameters['train'] = train_hyp # train model model = init_two_layer_convnet(dtype=X_train.dtype, **hyperparameters['init']) trainer = ClassifierTrainer() model, loss_history, train_acc_history, val_acc_history = trainer.train( X_train, y_train, X_val, y_val, model, loss_function, **hyperparameters['train']) # store results train_acc = max(train_acc_history) # the model returned corresponds to the best accuracy val_acc = max(val_acc_history) results[(val_acc, train_acc)] = hyperparameters if val_acc > best['val_acc']: best['model'] = model best['hyperparameters'] = hyperparameters best['val_acc'] = val_acc best['train_acc'] = train_acc best['val_acc_history'] = val_acc_history best['train_acc_history'] = train_acc_history
print 'Test labels shape: ', y_test.shape hidden_size = [600,800,1000,1200,1400,2000] training_epochs = [45] regs = [1e-2,1e-3,5e-3,1e-4,5e-4,2] learning_rate = [1e-3,1e-4,5e-5,1e-6,5e-6] best = 0 bmodel = None best_para = {} for hs in hidden_size: for ep in training_epochs: for reg in regs: for lr in learning_rate: print "test on param hs :",hs," ep: ",ep," reg: ",reg," lr:",lr model = init_two_layer_model(32*32*3, hs, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() best_model, loss_history, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val, model, two_layer_net, num_epochs=ep, reg=reg, momentum=0.9, learning_rate_decay = 0.95, learning_rate=lr, verbose=True) plt.subplot(2,1,1) plt.plot(loss_history) plt.title('Loss history with paramater hs: %d, reg: %f, lr: %lr'%(hs,reg,lr)) plt.xlabel('Iteration') plt.ylabel('Loss') plt.subplot(2, 1, 2) plt.plot(train_acc) plt.plot(val_acc)
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() print 'Train data shape: ', X_train.shape print 'Train labels shape: ', y_train.shape print 'Validation data shape: ', X_val.shape print 'Validation labels shape: ', y_val.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape from cs231n.classifier_trainer import ClassifierTrainer ####don't forget init model every train nn #### model = init_elu_relu_model(32*32*3, 100, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() ne = 20 _, loss_relu,train_acc,val_acc= trainer.train(X_train, y_train, X_val, y_val, model, relu, num_epochs=ne, reg=1.0,update='rmsprop', momentum=0.9, learning_rate_decay = 0.95,batch_size=80, learning_rate=1e-5, verbose=True) model = init_elu_relu_model(32*32*3, 100, 10) _, loss_leaky,train_acc_1,val_acc_1= trainer.train(X_train, y_train, X_val, y_val, model, leaky, num_epochs=ne, reg=1.0,update='rmsprop',batch_size=80, momentum=0.9, learning_rate_decay = 0.95, learning_rate=1e-5, verbose=True) model = init_elu_relu_model(32*32*3, 100, 10) _, loss_elu,train_acc_2,val_acc_2= trainer.train(X_train, y_train, X_val, y_val, model, elu,
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data() print 'Train data shape: ', X_train.shape print 'Train labels shape: ', y_train.shape print 'Validation data shape: ', X_val.shape print 'Validation labels shape: ', y_val.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape from cs231n.classifier_trainer import ClassifierTrainer ####don't forget init model every train nn #### model = init_elu_relu_model(32 * 32 * 3, 100, 10) # input size, hidden size, number of classes trainer = ClassifierTrainer() ne = 20 _, loss_relu, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val, model, relu, num_epochs=ne, reg=1.0, update='rmsprop', momentum=0.9, learning_rate_decay=0.95, batch_size=80, learning_rate=1e-5, verbose=True)