def runModel_i(input_vector):

    hls, lr, ne, rs = input_vector

    model = init_two_layer_model(
        32 * 32 * 3, int(hls),
        10)  # input size, hidden size, number of classes
    trainer = ClassifierTrainer()
    best_model = {}
    loss_history = []
    train_acc_history = []
    val_acc_history = []

    best_model, loss_history, train_acc_history, val_acc_history = trainer.train(
        X_train,
        y_train,
        X_val,
        y_val,
        model,
        two_layer_net,
        num_epochs=int(ne),
        reg=rs,
        momentum=0.9,
        learning_rate_decay=0.95,
        learning_rate=lr,
        verbose=True)
    print("Inside model I")
    print(val_acc_history)
    return best_model, loss_history, train_acc_history, val_acc_history
hidden_size = [600, 800, 1000, 1200, 1400, 2000]
training_epochs = [45]
regs = [1e-2, 1e-3, 5e-3, 1e-4, 5e-4, 2]
learning_rate = [1e-3, 1e-4, 5e-5, 1e-6, 5e-6]
best = 0
bmodel = None
best_para = {}
for hs in hidden_size:
    for ep in training_epochs:
        for reg in regs:
            for lr in learning_rate:
                print "test on param hs :", hs, " ep: ", ep, "  reg: ", reg, "  lr:", lr
                model = init_two_layer_model(
                    32 * 32 * 3, hs,
                    10)  # input size, hidden size, number of classes
                trainer = ClassifierTrainer()
                best_model, loss_history, train_acc, val_acc = trainer.train(
                    X_train,
                    y_train,
                    X_val,
                    y_val,
                    model,
                    two_layer_net,
                    num_epochs=ep,
                    reg=reg,
                    momentum=0.9,
                    learning_rate_decay=0.95,
                    learning_rate=lr,
                    verbose=True)

                plt.subplot(2, 1, 1)
# y = np.random.randint(num_classes, size=num_inputs)
# model = init_supercool_convnet(num_filters=3, filter_size=3, input_shape=input_shape)
# loss, grads = supercool_convnet(X, model, y)
# for param_name in sorted(grads):
#     f = lambda _: supercool_convnet(X, model, y)[0]
#     param_grad_num = eval_numerical_gradient(f, model[param_name], verbose=False, h=1e-6)
#     e = rel_error(param_grad_num, grads[param_name])
#     print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))

# # Make sure we can overfit...
# model = init_supercool_convnet(weight_scale=5e-2, bias_scale=0, filter_size=3) # weight_scale=5e-2
# trainer = ClassifierTrainer()
# best_model, loss_history, train_acc_history, val_acc_history = trainer.train(
#           X_train[:50], y_train[:50], X_val, y_val, model, supercool_convnet,
#           reg=0.001, momentum=0.9, learning_rate=0.0001, batch_size=10, num_epochs=10, # change to 20 epochs
#           verbose=True) # batch size 40-100

model = init_supercool_convnet(weight_scale=3e-2, bias_scale=0, filter_size=3)
trainer = ClassifierTrainer()
best_model, loss_history, train_acc_history, val_acc_history = trainer.train(
          X_train, y_train, X_val, y_val, model, supercool_convnet,
          reg=0.5, momentum=0.9, learning_rate=5e-5, batch_size=50, num_epochs=15, # change to 20 epochs
          verbose=True, acc_frequency=50) # batch size 40-100


with open('best_model_2.pkl', 'wb') as f:
    cPickle.dump(best_model, f)

# with open('best_model.pkl', 'rb') as f:
#     best_model = cPickle.load(f)
	
    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()


#print "three layer net with hypterparameter hidden[500,300], trainepoch 45, learning rate 1e-4"
#model = init_three_layer_model(32*32*3,[500,300],10,maxout=3)
model = init_two_layer_model(32*32*3,550,10)
trainer = ClassifierTrainer()
best_model, loss_history, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val,
                                                            model,
                                                            three_layer_net,
#                                                            two_layer_net,
                                                            num_epochs=45,reg=0.05,dropout=0.5,
#                                                            update='sgd',
                                                            momentum=0.9,learning_rate_decay=0.95,
                                                            learning_rate=1e-4,verbose=True)

plt.subplot(2, 1, 1)
plt.plot(loss_history)
plt.title('Loss history')
plt.xlabel('Iteration')
plt.ylabel('Loss')
# Use numeric gradient checking to check your implementation of the backward pass.
# If your implementation is correct, the difference between the numeric and
# analytic gradients should be less than 1e-8 for each of W1, W2, b1, and b2.

loss, grads = two_layer_net(X, model, y, reg)

# these should all be less than 1e-8 or so
for param_name in grads:
  param_grad_num = eval_numerical_gradient(lambda W: two_layer_net(X, model, y, reg)[0], model[param_name], verbose=False)
  print '%s max relative error: %e' % (param_name, rel_error(param_grad_num, grads[param_name]))

from cs231n.classifier_trainer import ClassifierTrainer

model = init_toy_model()
trainer = ClassifierTrainer()
# call the trainer to optimize the loss
# Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data)
best_model, loss_history, _, _ = trainer.train(X, y, X, y,
                                             model, two_layer_net,
                                             reg=0.001,
                                             learning_rate=1e-1, momentum=0.0, learning_rate_decay=1,
                                             update='sgd', sample_batches=False,
                                             num_epochs=100,
                                             verbose=False)
print 'Final loss with vanilla SGD: %f' % (loss_history[-1], )

model = init_toy_model()
trainer = ClassifierTrainer()
# call the trainer to optimize the loss
# Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data)
    # Reshape data to rows
    X_train = X_train.reshape(num_training, -1)
    X_val = X_val.reshape(num_validation, -1)
    X_test = X_test.reshape(num_test, -1)

    return X_train, y_train, X_val, y_val, X_test, y_test


# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()

#print "three layer net with hypterparameter hidden[500,300], trainepoch 45, learning rate 1e-4"
#model = init_three_layer_model(32*32*3,[500,300],10,maxout=3)
model = init_two_layer_model(32 * 32 * 3, 550, 10)
trainer = ClassifierTrainer()
best_model, loss_history, train_acc, val_acc = trainer.train(
    X_train,
    y_train,
    X_val,
    y_val,
    model,
    three_layer_net,
    #                                                            two_layer_net,
    num_epochs=45,
    reg=0.05,
    dropout=0.5,
    #                                                            update='sgd',
    momentum=0.9,
    learning_rate_decay=0.95,
    learning_rate=1e-4,
    print('%s max relative error: %e' %
          (param_name, rel_error(param_grad_num, grads[param_name])))

###############################################################################

# # Train the network
# To train the network we will use SGD with Momentum.
#Open the file `classifier_trainer.py` and familiarize yourself with the `ClassifierTrainer`
#class. It performs optimization given an arbitrary cost function data, and model.
#By default it uses vanilla SGD, which you need to implement.
#First, run the optimization below using Vanilla SGD:

from cs231n.classifier_trainer import ClassifierTrainer

model = init_toy_model()
trainer = ClassifierTrainer()
# call the trainer to optimize the loss
# Notice that we're using sample_batches=False, so we're performing Gradient Descent (no sampled batches of data)
best_model, loss_history, _, _ = trainer.train(X,
                                               y,
                                               X,
                                               y,
                                               model,
                                               two_layer_net,
                                               reg=0.001,
                                               learning_rate=1e-1,
                                               momentum=0.0,
                                               learning_rate_decay=1,
                                               update='sgd',
                                               sample_batches=False,
                                               num_epochs=100,
Exemple #8
0
    train_hyp = {}
    init_hyp['filter_size'] = 3
    init_hyp['num_filters'] = random.randint(80, 100)
    train_hyp['learning_rate'] = 10**random.uniform(-4, -3)
    train_hyp['reg'] = 10**random.uniform(-8, -7)
    train_hyp['num_epochs'] = 0.5
    train_hyp['momentum'] = 0.9
    train_hyp['batch_size'] = 100
    train_hyp['acc_frequency'] = 100
    hyperparameters['init'] = init_hyp
    hyperparameters['train'] = train_hyp

    # train model
    model = init_two_layer_convnet(dtype=X_train.dtype,
                                   **hyperparameters['init'])
    trainer = ClassifierTrainer()
    model, loss_history, train_acc_history, val_acc_history = trainer.train(
        X_train, y_train, X_val, y_val, model, loss_function,
        **hyperparameters['train'])

    # store results
    train_acc = max(train_acc_history
                    )  # the model returned corresponds to the best accuracy
    val_acc = max(val_acc_history)
    results[(val_acc, train_acc)] = hyperparameters
    if val_acc > best['val_acc']:
        best['model'] = model
        best['hyperparameters'] = hyperparameters
        best['val_acc'] = val_acc
        best['train_acc'] = train_acc
        best['val_acc_history'] = val_acc_history
Exemple #9
0
    init_hyp = {}
    train_hyp = {}
    init_hyp['filter_size'] = 3
    init_hyp['num_filters'] = random.randint(80, 100)
    train_hyp['learning_rate'] = 10 ** random.uniform(-4, -3)
    train_hyp['reg'] = 10 ** random.uniform(-8, -7)
    train_hyp['num_epochs'] = 0.5
    train_hyp['momentum'] = 0.9
    train_hyp['batch_size'] = 100
    train_hyp['acc_frequency'] = 100
    hyperparameters['init'] = init_hyp
    hyperparameters['train'] = train_hyp

    # train model
    model = init_two_layer_convnet(dtype=X_train.dtype, **hyperparameters['init'])
    trainer = ClassifierTrainer()
    model, loss_history, train_acc_history, val_acc_history = trainer.train(
                  X_train, y_train, X_val, y_val, model, loss_function,
                  **hyperparameters['train'])

    # store results
    train_acc = max(train_acc_history) # the model returned corresponds to the best accuracy
    val_acc = max(val_acc_history)
    results[(val_acc, train_acc)] = hyperparameters
    if val_acc > best['val_acc']:
        best['model'] = model
        best['hyperparameters'] = hyperparameters
        best['val_acc'] = val_acc
        best['train_acc'] = train_acc
        best['val_acc_history'] = val_acc_history
        best['train_acc_history'] = train_acc_history
print 'Test labels shape: ', y_test.shape

hidden_size = [600,800,1000,1200,1400,2000]
training_epochs = [45]
regs = [1e-2,1e-3,5e-3,1e-4,5e-4,2]
learning_rate = [1e-3,1e-4,5e-5,1e-6,5e-6]
best = 0
bmodel = None
best_para = {}
for hs in hidden_size:
    for ep in training_epochs:
        for reg in regs:
            for lr in learning_rate:
                print "test on param hs :",hs," ep: ",ep,"  reg: ",reg,"  lr:",lr
                model = init_two_layer_model(32*32*3, hs, 10) # input size, hidden size, number of classes
                trainer = ClassifierTrainer()
                best_model, loss_history, train_acc, val_acc = trainer.train(X_train, y_train, X_val, y_val,
                                             model, two_layer_net,
                                             num_epochs=ep, reg=reg,
                                             momentum=0.9, learning_rate_decay = 0.95,
                                             learning_rate=lr, verbose=True)

                plt.subplot(2,1,1)
                plt.plot(loss_history)
                plt.title('Loss history with paramater hs: %d, reg: %f, lr: %lr'%(hs,reg,lr))
                plt.xlabel('Iteration')
                plt.ylabel('Loss')
                
                plt.subplot(2, 1, 2)
                plt.plot(train_acc)
                plt.plot(val_acc)
Exemple #11
0
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape

from cs231n.classifier_trainer import ClassifierTrainer

####don't forget init model every train nn
####
model = init_elu_relu_model(32*32*3, 100, 10) # input size, hidden size, number of classes

trainer = ClassifierTrainer()
ne = 20
_, loss_relu,train_acc,val_acc= trainer.train(X_train, y_train, X_val, y_val,
                                             model, relu,
                                             num_epochs=ne, reg=1.0,update='rmsprop',
                                             momentum=0.9, learning_rate_decay = 0.95,batch_size=80,
                                             learning_rate=1e-5, verbose=True)
model = init_elu_relu_model(32*32*3, 100, 10)
_, loss_leaky,train_acc_1,val_acc_1= trainer.train(X_train, y_train, X_val, y_val,
                                             model, leaky,
                                             num_epochs=ne, reg=1.0,update='rmsprop',batch_size=80,
                                             momentum=0.9, learning_rate_decay = 0.95,
                                             learning_rate=1e-5, verbose=True)
model = init_elu_relu_model(32*32*3, 100, 10)
_, loss_elu,train_acc_2,val_acc_2= trainer.train(X_train, y_train, X_val, y_val,
                                             model, elu,
Exemple #12
0
X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()
print 'Train data shape: ', X_train.shape
print 'Train labels shape: ', y_train.shape
print 'Validation data shape: ', X_val.shape
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape

from cs231n.classifier_trainer import ClassifierTrainer

####don't forget init model every train nn
####
model = init_elu_relu_model(32 * 32 * 3, 100,
                            10)  # input size, hidden size, number of classes

trainer = ClassifierTrainer()
ne = 20
_, loss_relu, train_acc, val_acc = trainer.train(X_train,
                                                 y_train,
                                                 X_val,
                                                 y_val,
                                                 model,
                                                 relu,
                                                 num_epochs=ne,
                                                 reg=1.0,
                                                 update='rmsprop',
                                                 momentum=0.9,
                                                 learning_rate_decay=0.95,
                                                 batch_size=80,
                                                 learning_rate=1e-5,
                                                 verbose=True)