Example #1
0
    def compute_at(hyper_params):
        learning_rate, regularizer = hyper_params
        svm = LinearSVM()
        svm.train(X_train, y_train, learning_rate=learning_rate, reg=regularizer, num_iters=1000)

        y_train_prediction = svm.predict(X_train)
        train_accuracy = np.mean(y_train == y_train_prediction)
        y_val_prediction = svm.predict(X_val)
        val_accuracy = np.mean(y_val == y_val_prediction)
        final_accuracy = min(train_accuracy, val_accuracy)

        state.epoch += 1
        improved = state.accuracy < final_accuracy
        if improved:
            state.accuracy = final_accuracy
            state.svm = svm
            state.hyper = hyper_params[:]

        print "Epoch %2d: (%.8f, %f) -> %f %s" % (state.epoch, learning_rate, regularizer, final_accuracy, "(!)" if improved else "")
        return improved, final_accuracy
Example #2
0
def train(X_train, y_train, X_val, y_val):
    # Use the validation set to tune hyperparameters (regularization strength and
    # learning rate). You should experiment with different ranges for the learning
    # rates and regularization strengths; if you are careful you should be able to
    # get a classification accuracy of about 0.4 on the validation set.
    learning_rates = [1e-7, 5e-6, 1e-6]
    regularization_strengths = [1e4, 5e4, 1e5]

    # results is dictionary mapping tuples of the form
    # (learning_rate, regularization_strength) to tuples of the form
    # (training_accuracy, validation_accuracy). The accuracy is simply the fraction
    # of data points that are correctly classified.
    results = {}
    best_val = -1   # The highest validation accuracy that we have seen so far.
    best_svm = None # The LinearSVM object that achieved the highest validation rate.

    for lr in learning_rates:
        for reg in regularization_strengths:
            svm = LinearSVM()
            svm.train(X_train, y_train, learning_rate=lr, reg=reg, num_iters=1000)

            y_train_pred = svm.predict(X_train)
            train_accuracy = np.mean(y_train == y_train_pred)
            y_val_pred = svm.predict(X_val)
            val_accuracy = np.mean(y_val == y_val_pred)

            results[(lr, reg)] = (train_accuracy, val_accuracy)

            if best_val < val_accuracy:
                best_val = val_accuracy
                best_svm = svm

    # Print out results.
    for lr, reg in sorted(results):
        train_accuracy, val_accuracy = results[(lr, reg)]
        print 'Learning-rate=%f regularizer=%f train-accuracy=%f validation-accuracy=%f' % (lr, reg, train_accuracy, val_accuracy)

    print 'Best validation accuracy achieved during cross-validation: %f' % best_val
    return best_svm
Example #3
0
def main():
    X_train, y_train, X_val, y_val, X_test, y_test = gen_train_val_test(49000, 1000, 1000)
    # generate a random SVM weight matrix of small numbers
    W = np.random.randn(10, 3073) * 0.01
    start = time.clock()
    loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001)
    end = time.clock()
    print "svm_loss_naive: %f s" % (end - start)
    print 'loss: %f' % (loss, )
    start = time.clock()
    loss1, grad = svm_loss_vectorized(W, X_train, y_train, 0.00001)
    end = time.clock()
    print "svm_loss_vectorized: %f s" % (end - start)
    print 'loss: %f' % (loss1, )
    svm = LinearSVM()
    tic = time.time()
    loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4,
                                  num_iters=100000,batch_size=128, verbose=True)
    acc_train = evaluation(svm, X_train, y_train)
    acc_val = evaluation(svm, X_val, y_val)
    acc_test = evaluation(svm, X_test, y_test)
    print 'Train acc :{} Validation :{} Test :{}'.format(acc_train, acc_val, acc_test)
    toc = time.time()
    print 'That took %fs' % (toc - tic)
Example #4
0
print('Vectorized loss and gradient: computed in %fs' % (toc - tic))

# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('difference: %f' % difference)

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
#plt.show()

# Write the LinearSVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = svm.predict(X_train)
Example #5
0
print('difference: %f' % difference)

# ### Stochastic Gradient Descent
#
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss.

# In[ ]:

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# In[ ]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# In[ ]:
Example #6
0
#
#loss_naive, grad_naive = svm_loss_naive(W, X_dev, y_dev, 0.0)
#
#from cs231n.classifiers.linear_svm import svm_loss_vectorized
#
#loss_vectorized, grad_vectorized = svm_loss_vectorized(W, X_dev, y_dev, 5e1)
#
#difference = np.linalg.norm(grad - grad_vectorized, ord='fro')
#print('difference: %f' % difference)

from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=3e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

y_train_pred = svm.predict(X_train)
print('training accuracy: %f' % (np.mean(y_train == y_train_pred), ))
y_val_pred = svm.predict(X_val)
print('validation accuracy: %f' % (np.mean(y_val == y_val_pred), ))
Example #7
0
# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('difference: %f' % difference)

# In[62]:

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# In[63]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# In[69]:
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'difference: %f' % difference

# ### Stochastic Gradient Descent
#
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical
# gradient. We are therefore ready to do SGD to minimize the loss.

# Now implement SGD in LinearSVM.train() function and run it with the code below
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-6,
                      reg=1e5,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print 'That took %fs' % (toc - tic)

# In[ ]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# In[ ]:
Example #9
0
# set. For each combination of hyperparameters, train a linear SVM on the      #
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################
for learning_rate in learning_rates:
    for reg in regularization_strengths:
        print("LR",learning_rate,"reg",reg)
        svm = LinearSVM()
        loss_hist = svm.train(X_train, y_train, learning_rate=learning_rate[0], reg=reg,
                      num_iters=learning_rate[1], verbose=True)
        y_train_pred = svm.predict(X_train)
        y_val_pred = svm.predict(X_val)
        results[(learning_rate[0],reg)] = (np.mean(y_train == y_train_pred),np.mean(y_val == y_val_pred))
        
        if best_val < np.mean(y_val == y_val_pred):
            best_val = np.mean(y_val == y_val_pred)
            best_parameters = { 'LR':learning_rate[0], 'reg': reg}

################################################################################
#                              END OF YOUR CODE                                #
################################################################################
    
# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
Example #10
0
print('Vectorized loss and gradient: computed in %fs' % (toc - tic))

# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('difference: %f' % difference)

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# Write the LinearSVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = svm.predict(X_train)
Example #11
0
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################

iters = 2000 #100
for lr in learning_rates:
    for rs in regularization_strengths:
        svm = LinearSVM()
        svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)
        
        y_train_pred = svm.predict(X_train)
        acc_train = np.mean(y_train == y_train_pred)
        y_val_pred = svm.predict(X_val)
        acc_val = np.mean(y_val == y_val_pred)
        
        results[(lr, rs)] = (acc_train, acc_val)
        
        if best_val < acc_val:
            best_val = acc_val
            best_svm = svm
    
# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
Example #12
0
from cs231n.classifiers import LinearSVM

# results is dictionary mapping tuples of the form
# (learning_rate, regularization_strength) to tuples of the form
# (training_accuracy, validation_accuracy). The accuracy is simply the fraction
# of data points that are correctly classified.
results = {}
best_val = -1  # The highest validation accuracy that we have seen so far.
best_svm = None  # The LinearSVM object that achieved the highest validation rate.

for lr in learning_rates:
    for reg in regularization_strengths:
        svm = LinearSVM()
        loss_hist = svm.train(X_train,
                              y_train,
                              learning_rate=lr,
                              reg=reg,
                              num_iters=1500,
                              verbose=False)
        y_train_pred = svm.predict(X_train)
        tr_acc = np.mean(y_train == y_train_pred)
        y_val_pred = svm.predict(X_val)
        val_acc = np.mean(y_val == y_val_pred)
        print(
            'lr: %f, reg: %f, training accuracy: %f, validation accuracy: %f' %
            (
                lr,
                reg,
                tr_acc,
                val_acc,
            ))
        results[(lr, reg)] = (tr_acc, val_acc)
Example #13
0
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################

num_iter = 500

for i in learning_rates:
    for j in regularization_strengths:
        results[(i, j)]=0

for lr, reg in sorted(results):
    svm = LinearSVM()
    svm.train(X_train, y_train, lr, reg, num_iter, True)
    y_pred = svm.predict(X_test)
    val_accuracy = float(np.sum(y_pred == y_test))/y_test.shape[0]

    y_pred = svm.predict(X_train)
    train_accuracy = float(np.sum(y_pred == y_train))/y_train.shape[0]

    results[(lr, reg)] = [train_accuracy, val_accuracy]
    if(best_val<val_accuracy):
        best_val = val_accuracy
        best_svm = svm

################################################################################
#                              END OF YOUR CODE                                #
################################################################################
Example #14
0
# Write code that chooses the best hyperparameters by tuning on the validation #
# set. For each combination of hyperparameters, train a linear SVM on the      #
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################
for lr in learning_rates:
    for reg in regularization_strengths:
        svm = LinearSVM()
        svm.train(X_train, y_train, lr, reg, num_iters=10)
        y_train_pred = svm.predict(X_train)
        y_val_pred = svm.predict(X_val)
        tr_acc = np.mean(y_train_pred == y_train)
        val_acc = np.mean(y_val_pred == y_val)
        keys = (lr, reg)
        values = (tr_acc, val_acc)
        results[keys] = values
        if val_acc > best_val:
            best_val = val_acc
            best_svm = svm
################################################################################
#                              END OF YOUR CODE                                #
################################################################################

# Print out results.
toc = time.time()
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic)

# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'difference: %f' % difference


# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
# tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4, num_iters=500, verbose=True)   #should be 1500 iterations
# toc = time.time()
print 'That took %fs' % (toc - tic)

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
# plt.plot(loss_hist)
# plt.xlabel('Iteration number')
# plt.ylabel('Loss value')
# plt.show()

# Write the LinearSVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = svm.predict(X_train)
print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), )
y_val_pred = svm.predict(X_val)
Example #16
0
print('difference: %f' % difference)

# %% [markdown]
# ### Stochastic Gradient Descent
#
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss. Your code for this part will be written inside `cs231n/classifiers/linear_classifier.py`.

# %%
# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# %%
# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# %%
# Write the LinearSVM.predict function and evaluate the performance on both the
Example #17
0
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################
for lr_idx in range(0,len(learning_rates)):
    for reg_idx in range(0,len(regularization_strengths)):
        lr_use = learning_rates[lr_idx]
        reg_use = regularization_strengths[reg_idx]
        svm = LinearSVM()
        loss_hist = svm.train(X_dev, y_dev, learning_rate=lr_use, reg=reg_use,
                      num_iters=1500, verbose=True)
        y_train_pred = svm.predict(X_dev)
        y_val_pred = svm.predict(X_val)

        acc_train = np.mean(y_train == y_train_pred)
        acc_val = np.mean(y_val == y_val_pred)
        if acc_val > best_val:
            best_lr = lr_use
            best_reg = reg_use
            best_val = acc_val
            best_svm = svm
        results_once = {(lr_use, reg_use): (acc_train, acc_val)}
        results.update(results_once)
pdb.set_trace()
################################################################################
#                              END OF YOUR CODE                                #
Example #18
0
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'difference: %f' % difference


# ### Stochastic Gradient Descent
# 
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss.

# In[ ]:

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4,
                      num_iters=1500, verbose=True)
toc = time.time()
print 'That took %fs' % (toc - tic)


# In[ ]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()


# In[ ]:
Example #19
0
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################

iters = 2000  # 100
for lr in learning_rates:
    for rs in regularization_strengths:
        svm = LinearSVM()
        svm.train(X_train, y_train, learning_rate=lr, reg=rs, num_iters=iters)

        y_train_pred = svm.predict(X_train)
        acc_train = np.mean(y_train == y_train_pred)
        y_val_pred = svm.predict(X_val)
        acc_val = np.mean(y_val == y_val_pred)

        results[(lr, rs)] = (acc_train, acc_val)

        if best_val < acc_val:
            best_val = acc_val
            best_svm = svm

# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
Example #20
0
tic = time.time()
loss_vectorized, _ = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

# The losses should match but your vectorized implementation should be much faster.
print('difference: %f' % (loss_naive - loss_vectorized))

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# Write the LinearSVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = svm.predict(X_train)
Example #21
0
print('difference: %f' % difference)

# ### Stochastic Gradient Descent
#
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss.

# In[ ]:

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# In[ ]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# In[ ]:
Example #22
0
toc = time.time()
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic)

# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'difference: %f' % difference

# 查看梯度下降法的效果
# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4,
                      num_iters=1500, verbose=True)
toc = time.time()
print 'That took %fs' % (toc - tic)

# 将损失曲线画出来
# A useful debugging strategy is to plot the loss as a function of
# iteration number:
# plt.plot(loss_hist)
# plt.xlabel('Iteration number')
# plt.ylabel('Loss value')
# plt.show()

# 在训练集和验证集上分别预测一下
y_train_pred = svm.predict(X_train)
print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), )
y_val_pred = svm.predict(X_val)
Example #23
0
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################
tic = time.time()
for ilr in np.arange(learning_rates[0], learning_rates[1],
                     0.05 * (learning_rates[1] - learning_rates[0])):
    for ireg in np.arange(
            regularization_strengths[0], regularization_strengths[1], 0.05 *
        (regularization_strengths[1] - regularization_strengths[0])):
        svm = LinearSVM()
        _ = svm.train(X_train,
                      y_train,
                      learning_rate=ilr,
                      reg=ireg,
                      num_iters=1500,
                      verbose=True)
        y_train_pred = svm.predict(X_train)
        train_acc = np.mean(y_train == y_train_pred)
        print('training accuracy: %f' % (train_acc))
        y_val_pred = svm.predict(X_val)
        val_acc = np.mean(y_val == y_val_pred)
        if val_acc > best_val:
            best_val = val_acc
            best_svm = svm
        print('validation accuracy: %f' % (val_acc))
        results[(ilr, ireg)] = (train_acc, val_acc)

################################################################################
#                              END OF YOUR CODE                                #
Example #24
0
# Write code that chooses the best hyperparameters by tuning on the validation #
# set. For each combination of hyperparameters, train a linear SVM on the      #
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################
for learning_rate in learning_rates:
    for regularization_strength in regularization_strengths:
        svm = LinearSVM()
        loss_hist = svm.train(X_train, y_train, learning_rate, regularization_strength,
                      num_iters=1500, verbose=True)
        y_train_pred = svm.predict(X_train)
        y_val_pred = svm.predict(X_val)
        train_accuracy, val_accuracy = np.mean(y_train_pred==y_train),np.mean(y_val==y_val_pred)
        results[(learning_rate,regularization_strength)] = (train_accuracy,val_accuracy)
        
        if val_accuracy>best_val:
            best_val = val_accuracy
            best_svm = svm
################################################################################
#                              END OF YOUR CODE                                #
################################################################################
    
# Print out results.
for lr, reg in sorted(results):
    train_accuracy, val_accuracy = results[(lr, reg)]
Example #25
0
print 'difference: %f' % difference

# ### Stochastic Gradient Descent
#
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss.

# In[ ]:

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print 'That took %fs' % (toc - tic)

# In[ ]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# In[ ]:
Example #26
0
toc = time.time()
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic)

# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'difference: %f' % difference
'''
Stochastic Gradient Descent
'''
# Now implement SGD in LinearSVM.train() function and run it with the code below
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=5e4,
                      num_iters=1500, verbose=True)
toc = time.time()
print 'That took %fs' % (toc - tic)

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# Write the LinearSVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = svm.predict(X_train)
print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), )
y_val_pred = svm.predict(X_val)
Example #27
0
print 'Vectorized loss and gradient: computed in %fs' % (toc - tic)

# The loss is a single number, so it is easy to compare the values computed
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'difference: %f' % difference

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print 'That took %fs' % (toc - tic)

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()

# Write the LinearSVM.predict function and evaluate the performance on both the
# training and validation set
y_train_pred = svm.predict(X_train)
# by the two implementations. The gradient on the other hand is a matrix, so
# we use the Frobenius norm to compare them.
difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'difference: %f' % difference


# ### Stochastic Gradient Descent
#
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical
# gradient. We are therefore ready to do SGD to minimize the loss.

# Now implement SGD in LinearSVM.train() function and run it with the code below
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train, y_train, learning_rate=1e-6, reg=1e5,
                      num_iters=1500, verbose=True)
toc = time.time()
print 'That took %fs' % (toc - tic)


# In[ ]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
plt.show()


# In[ ]:
Example #29
0
# training set, compute its accuracy on the training and validation sets, and  #
# store these numbers in the results dictionary. In addition, store the best   #
# validation accuracy in best_val and the LinearSVM object that achieves this  #
# accuracy in best_svm.                                                        #
#                                                                              #
# Hint: You should use a small value for num_iters as you develop your         #
# validation code so that the SVMs don't take much time to train; once you are #
# confident that your validation code works, you should rerun the validation   #
# code with a larger value for num_iters.                                      #
################################################################################
for learning_rate in learning_rates:
    for reg in regularization_strengths:
        svm = LinearSVM()
        loss_hist = svm.train(X_train,
                              y_train,
                              learning_rate=learning_rate,
                              reg=reg,
                              num_iters=2500,
                              verbose=True)
        # training and validation set
        y_train_pred = svm.predict(X_train)
        training_accuracy = np.mean(y_train == y_train_pred)
        print('training accuracy: %f' % (training_accuracy, ))
        y_val_pred = svm.predict(X_val)
        validation_accuracy = np.mean(y_val == y_val_pred)
        print('validation accuracy: %f' % (validation_accuracy, ))
        results[(learning_rate, reg)] = (training_accuracy,
                                         validation_accuracy)
        if validation_accuracy > best_val:
            best_val = validation_accuracy
            best_svm = svm
################################################################################
Example #30
0
print('difference: %f' % difference)

# ### Stochastic Gradient Descent
#
# We now have vectorized and efficient expressions for the loss, the gradient and our gradient matches the numerical gradient. We are therefore ready to do SGD to minimize the loss.

# In[37]:

# In the file linear_classifier.py, implement SGD in the function
# LinearClassifier.train() and then run it with the code below.
from cs231n.classifiers import LinearSVM
svm = LinearSVM()
tic = time.time()
loss_hist = svm.train(X_train,
                      y_train,
                      learning_rate=1e-7,
                      reg=2.5e4,
                      num_iters=1500,
                      verbose=True)
toc = time.time()
print('That took %fs' % (toc - tic))

# In[38]:

# A useful debugging strategy is to plot the loss as a function of
# iteration number:
plt.plot(loss_hist)
plt.xlabel('Iteration number')
plt.ylabel('Loss value')
#plt.show()

# In[39]:
def SVM(train_data, train_label, validation_data, validation_label, test_data, test_label):
    W = np.random.randn(10, 3072) * 0.0001
    loss, grad = svm_loss_naive(W, train_data, train_label, 0.000005)
    print 'loss: %f \n' % loss
    '''
    f=lambda w: svm_loss_naive(w, train_data,train_label,0.0)[0]
    grad_numerical=grad_check_sparse(f,W,grad,10)
    loss, grad = svm_loss_naive(W,train_data,train_label,5e1)
    f=lambda w:svm_loss_naive(w,train_data,train_label,5e1)[0]
    grad_numerical=grad_check_sparse(f,W,grad,10)

    t1 = time.time()
    loss_naive, grad_naive = svm_loss_naive(W, train_data, train_label, 0.000005)
    t2 = time.time()
    print '\nNaive Loss: %e computed in %fs'%(loss_naive, t2-t1)

    t1 = time.time()
    loss_vectorized,grad_vectorized = svm_loss_vectorized(W, train_data, train_label, 0.000005)
    t2 = time.time()
    print 'Vectorised loss and gradient: %e computed in %fs\n'%(loss_vectorized, t2-t1)

    difference = np.linalg.norm(grad_naive-grad_vectorized, ord='fro')
    print 'difference: %f'%difference
    '''
    from cs231n.classifiers import LinearSVM

    svm = LinearSVM()
    t1 = time.time()
    loss_hist = svm.train(train_data, train_label,
                          learning_rate=1e-7, reg=5e4, num_iters=1000, verbose=True)
    t2 = time.time()
    print 'That took %fs' % (t2 - t1)

    plt.plot(loss_hist)
    plt.xlabel('Iteration number')
    plt.ylabel('Loss value')
    plt.show()

    train_label_predict = svm.predict(train_data)
    print 'Training accuracy: %f' % np.mean(train_label == train_label_predict)
    validation_label_predict = svm.predict(validation_data)
    print 'Validation accuracy: %f' % np.mean(validation_label == validation_label_predict)

    learning_rates = [1e-7, 2e-7, 5e-7, 1e-6]
    regularization_strengths = [1e4, 2e4, 5e4, 1e5, 5e5, 1e6]

    results = {}
    best_val = -1
    best_svm = None

    for learning in learning_rates:
        for regularization in regularization_strengths:
            svm = LinearSVM()
            svm.train(train_data, train_label, learning_rate=learning,
                      reg=regularization, num_iters=2000)
            train_label_predict = svm.predict(train_data)
            train_accuracy = np.mean(train_label_predict == train_label)
            print 'Training accuracy: %f' % train_accuracy
            validation_label_predict = svm.predict(validation_data)
            val_accuracy = np.mean(validation_label_predict == validation_label)
            print 'Validation accuracy: %f' % val_accuracy

            if val_accuracy > best_val:
                best_val = val_accuracy
                best_svm = svm

            results[(learning, regularization)] = (
                train_accuracy, val_accuracy)

    for lr, reg in sorted(results):
        train_accuracy, val_accuracy = results[(lr, reg)]
        print 'lr %e reg %e train accuracy: %f val accuracy %f' % (lr, reg, train_accuracy, val_accuracy)
    print 'Best validation accuracy achieved during cross validation: %f ' % best_val

    x_scatter = [math.log10(x[0]) for x in results]
    y_scatter = [math.log10(x[1]) for x in results]

    sz = [results[x][0] * 1500 for x in results]
    plt.subplot(1, 1, 1)
    plt.scatter(x_scatter, y_scatter, sz)
    plt.xlabel('log learning rate')
    plt.ylabel('log regularization strength')
    plt.title('Cifar-10 training accuracy')
    plt.show()

    sz = [results[x][1] * 1500 for x in results]
    plt.subplot(1, 1, 1)
    plt.scatter(x_scatter, y_scatter, sz)
    plt.xlabel('log learning rate')
    plt.ylabel('log regularization strength')
    plt.title('Cifar-10 validation accuracy')
    plt.show()

    y_test_pred = best_svm.predict(test_data)
    test_accuracy = np.mean(y_test_pred == test_label)
    print 'Linear SVM on raw pixels final test set accuracy: %f' % test_accuracy

    print best_svm.W.shape
    w = best_svm.W[:, :]
    print w.shape
    w = w.reshape(10, 32, 32, 3)
    w_min, w_max = np.min(w), np.max(w)
    classes = ['plane', 'car', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']
    for i in xrange(10):
        plt.subplot(2, 5, i + 1)
        wimg = 255.0 * (w[i].squeeze() - w_min) / (w_max - w_min)
        plt.imshow(wimg.astype('uint8'))
        plt.axis('off')
        plt.title(classes[i])
    plt.show()