Example #1
0
def test_SVM_random_weights(sample_train, weight_size=0.0001, regularization=1.0):
    Xtrain, ytrain = sample_train()
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(3073,10) * weight_size
    loss, grad = svm_loss_naive(W, Xtrain, ytrain, regularization)
    assert loss > 8.5
Example #2
0
def test_SVM_loss_naive_no_bias_X(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    #i.e. using the correct W size
    W = np.random.randn(Xtrain.shape[1] + 1,10) * 0.0001
    with pytest.raises(ValueError):
        loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
Example #3
0
def test_SVM_loss_naive_no_bias_X(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    #i.e. using the correct W size
    W = np.random.randn(Xtrain.shape[1] + 1, 10) * 0.0001
    with pytest.raises(ValueError):
        loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
Example #4
0
def test_SVM_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, reg)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Example #5
0
def test_SVM_loss_naive_vectorized_comparison(sample_train, train_count):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = svm_loss_vectorized(W, Xtrain, ytrain, 1e2)
    loss_naive, grad_naive = svm_loss_naive(W, Xtrain, ytrain, 1e2)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Example #6
0
def test_SVM_random_weights(sample_train,
                            weight_size=0.0001,
                            regularization=1.0):
    Xtrain, ytrain = sample_train()
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(3073, 10) * weight_size
    loss, grad = svm_loss_naive(W, Xtrain, ytrain, regularization)
    assert loss > 8.5
Example #7
0
def test_SVM_loss_naive_no_bias_W(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    #using the incorrect W size
    W = np.random.randn(Xtrain.shape[1],10) * 0.0001

    #add the bias dimension
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    with pytest.raises(ValueError):
        loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
Example #8
0
def test_SVM_loss_naive_no_bias_W(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    #using the incorrect W size
    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001

    #add the bias dimension
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    with pytest.raises(ValueError):
        loss, grad = svm_loss_naive(W, Xtrain, ytrain, 1e2)
Example #9
0
def main():
    X_train, y_train, X_val, y_val, X_test, y_test = gen_train_val_test(49000, 1000, 1000)
    # generate a random SVM weight matrix of small numbers
    W = np.random.randn(10, 3073) * 0.01
    start = time.clock()
    loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001)
    end = time.clock()
    print "svm_loss_naive: %f s" % (end - start)
    print 'loss: %f' % (loss, )
    start = time.clock()
    loss1, grad = svm_loss_vectorized(W, X_train, y_train, 0.00001)
    end = time.clock()
    print "svm_loss_vectorized: %f s" % (end - start)
    print 'loss: %f' % (loss1, )
    svm = LinearSVM()
    tic = time.time()
    loss_hist = svm.train(X_train, y_train, learning_rate=1e-7, reg=3e4,
                                  num_iters=100000,batch_size=128, verbose=True)
    acc_train = evaluation(svm, X_train, y_train)
    acc_val = evaluation(svm, X_val, y_val)
    acc_test = evaluation(svm, X_test, y_test)
    print 'Train acc :{} Validation :{} Test :{}'.format(acc_train, acc_val, acc_test)
    toc = time.time()
    print 'That took %fs' % (toc - tic)
Example #10
0
print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
from cs231n.classifiers.linear_svm import svm_loss_vectorized
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
from cs231n.gradient_check import grad_check_sparse
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad)

# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1)
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad)
Example #11
0
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

# ## SVM Classifier
#
# Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**.
#
# As you can see, we have prefilled the function `compute_loss_naive`
# which uses for loops to evaluate the multiclass SVM loss function.

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))

# The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function.
#
# To check that you have correctly implemented the gradient correctly, you
# can numerically estimate the gradient of the loss function and compare
# the numeric estimate to the gradient that you computed. We have provided
# code that does this for you:

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)
Example #12
0
def f(w):
    return svm_loss_naive(w, X_dev, y_dev, 5e1)[0]
Example #13
0
xTest = xTest[:lengthTest]
yTest = yTest[:lengthTest]

# print(xTrain.shape)
# print(np.mean(xTrain,axis = 0).shape)
# print(np.std(xTrain,axis = 0).shape)

xTrain = (xTrain - np.mean(xTrain, axis=0)) / np.std(xTrain, axis=0)
# print(np.mean(xTrain,axis = 0))
# print(np.std(xTrain,axis = 0))

W = 2 * np.random.random_sample((xTrain.shape[1], 10)) - 1
reg = 10

start = time.time()
loss, dw = linear_svm.svm_loss_naive(W, xTrain, yTrain, 0)
print("Time for Naive ", time.time() - start)

start = time.time()
lossVector, dwVector = linear_svm.svm_loss_vectorized(W, xTrain, yTrain, 0)
print("Time for Vectorised approach ", time.time() - start)

# def f(w):
# return linear_svm.svm_loss_naive(w,xTrain,yTrain,0)[0]
# gradient_check.grad_check_sparse(f, W, dw, num_checks=10, h=1e-5)

cvFold = 5
learningRates = [1e-3]
regStrengths = [0, 100, 200, 400, 500]

xTrainCV = np.array(np.split(xTrain, cvFold))
Example #14
0
# only has to worry about optimizing a single weight matrix W.在每一个矩阵后面添加一列偏置维度,全是1,可以看到变成了3073
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001)
print('loss: %f' % (loss, ))

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
from cs231n.gradient_check import grad_check_sparse
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad)
Example #15
0
File: test.py Project: Tang7/cnn231
# dW = diff.dot(X.T)

correct = num_sum * y_index

print correct[:,:2]

diff -= correct

# print diff

dW = diff.dot(X.T)/float(num_train)

# print dW

loss1, grad = svm_loss_naive(W, X, y, 1)

# print grad


loss2, grad2 = svm_loss_vectorized(W, X, y, 1)

# print grad2
print loss
print loss1
print loss2

differ = np.linalg.norm(grad - grad2, ord='fro')
print 'difference: %f' % differ

differ1 = np.linalg.norm(dW - grad, ord='fro')
# ## SVM Classifier
#
# Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**.
#
# As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function.

# In[9]:

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(10, 3073) * 0.0001
loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001)
print 'loss: %f' % (loss, )

# The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function.
#
# To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you:

# In[12]:

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
#loss, grad = svm_loss_naive(W, X_train, y_train, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
Example #17
0
    (X_train.shape[0], 1))])  #hstack((a,b))将b附在a后
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
# print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(
    3073, 10) * 0.0001  #np.random.randn(3073, 10)产生(3073, 10)大小数组,元素服从N(0,1)

loss, grad = svm_loss_naive(
    W, X_dev, y_dev,
    0.000005)  # 0.000005                                              ???
print('loss: %f' % (loss, ))  # 输出:loss: 9.548658

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

''''' 
# Numerically compute the gradient along several randomly chosen dimensions, and 
# compare them with your analytically computed gradient. The numbers should match 
# almost exactly along all dimensions. 
from cs231n.gradient_check import grad_check_sparse 
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] 
grad_numerical = grad_check_sparse(f, W, grad) 

# do the gradient check once again with regularization turned on 
def old_vect_grad_desc():

    ## Old version of gradient descent

    from cs231n.classifiers.linear_svm import svm_loss_naive
    from cs231n.classifiers.linear_svm import svm_loss_vectorized

    loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.5, dograd=False)
    loss2, grad = svm_loss_vectorized(W, X_dev, y_dev, 0.5)

    print "loss: ", loss
    print "loss2: ", loss2

    # data
    #reg = 10000
    X = X_dev
    y = y_dev
    delta = 1
    loss_sum = 0.0
    loss = 0
    reg = 0.5
    step_size = 1e-5
    dW = np.zeros(W.shape)  # initialize the gradient as zero
    dW2 = np.zeros(W.shape)  # initialize the gradient as zero

    scores = X.dot(W)

    # calculating  'correct_class_scores':
    unit_mat = np.ones([X.shape[0], W.shape[1]])

    c = range(0, X.shape[0])
    correct_class_score = np.identity(X.shape[0])
    correct_class_score[c, c] = scores[c, y[c]]

    correct_class_score = np.dot(correct_class_score, unit_mat)
    correct_class_score[c, y[c]] = 0

    loss = scores - correct_class_score + unit_mat * delta

    print loss.shape
    loss_uni = np.ones(loss.shape)
    loss_uni[loss < 0] = 0
    print np.sum(loss_uni, axis=1)
    print loss_uni

    ## grad for y_i <> j
    dW2 = np.dot(np.transpose(X), loss_uni)
    print "dw2:", dW2.shape

    gr_minus = np.sum(loss > 0, axis=1)
    print "gr_minus", gr_minus

    zero_mat = np.zeros([X.shape[0], W.shape[1]])
    zero_mat[c, y] = 1

    gr_minus = np.dot(np.identity(gr_minus.shape[0]) * gr_minus, zero_mat)
    print gr_minus.shape
    print gr_minus[0:20, 0:20]

    print "x shape: ", X.shape

    dW2 -= np.dot(np.transpose(X), gr_minus)
    dW2 /= X.shape[0]
    dW2 += 0.5 * reg * 2 * W

    print "loss matr dimensions: ", loss.shape
    print X.shape
    k = 1

    for k in xrange(X.shape[1]):
        for l in xrange(W.shape[1]):

            X_h = np.zeros([X.shape[0], W.shape[1]])
            X_h[:, l] = X[:, k] * step_size

            c = range(0, X.shape[0])
            unit_mat = np.ones([X.shape[0], W.shape[1]])
            correct_class_score = np.identity(X.shape[0])
            correct_class_score[c, c] = X_h[c, y[c]]
            correct_class_score = np.dot(correct_class_score, unit_mat)
            correct_class_score[c, y[c]] = 0

            loss_h = loss + X_h - correct_class_score
            loss_h[loss_h < 0] = 0
            loss_h[c, y[c]] = 0

            loss_ph_sum = (np.sum(loss_h) / X.shape[0])

            loss_h = loss - X_h + correct_class_score
            loss_h[loss_h < 0] = 0
            loss_h[c, y[c]] = 0
            loss_mh_sum = (np.sum(loss_h) / X.shape[0])

            dW[k, l] = loss_ph_sum - loss_mh_sum

    dW = (dW + reg * 2 * W * step_size + step_size**2) / (2 * step_size)

    print dW.shape
    print dW[0, 0:10]

    print "dW2"
    print dW2.shape
    print dW2[0, 0:10]

    loss[loss < 0] = 0
    loss[c, y[c]] = 0

    difference = np.linalg.norm(dW - dW2, ord='fro')
    print("diff ", difference)

    loss_sum = (np.sum(loss) / X.shape[0]) + 0.5 * reg * np.sum(W * W)

    print loss_sum
Example #19
0
    def train(self,
              X,
              y,
              learning_rate=1e-3,
              reg=1e-5,
              num_iters=100,
              batch_size=200,
              verbose=False):
        """
        Train this linear classifier using stochastic gradient descent.

        Inputs:
        - X: A numpy array of shape (N, D) containing training data; there are N
          training samples each of dimension D.
        - y: A numpy array of shape (N,) containing training labels; y[i] = c
          means that X[i] has label 0 <= c < C for C classes.
        - learning_rate: (float) learning rate for optimization.
        - reg: (float) regularization strength.
        - num_iters: (integer) number of steps to take when optimizing
        - batch_size: (integer) number of training examples to use at each step.
        - verbose: (boolean) If true, print progress during optimization.

        Outputs:
        A list containing the value of the loss function at each training iteration.
        """
        num_train, dim = X.shape
        num_classes = np.max(
            y
        ) + 1  # assume y takes values 0...K-1 where K is number of classes
        if self.W is None:
            # lazily initialize W
            self.W = 0.001 * np.random.randn(dim, num_classes)

        # Run stochastic gradient descent to optimize W
        loss_history = []
        for it in range(num_iters):
            X_batch = None
            y_batch = None

            #########################################################################
            # TODO:                                                                 #
            # Sample batch_size elements from the training data and their           #
            # corresponding labels to use in this round of gradient descent.        #
            # Store the data in X_batch and their corresponding labels in           #
            # y_batch; after sampling X_batch should have shape (dim, batch_size)   #
            # and y_batch should have shape (batch_size,)                           #
            #                                                                       #
            # Hint: Use np.random.choice to generate indices. Sampling with         #
            # replacement is faster than sampling without replacement.              #
            #########################################################################
            indexes = np.arange(num_train)
            np.random.shuffle(indexes)
            indexes = indexes[:batch_size]
            X_batch = X[indexes]  # sample 256 examples
            y_batch = y[indexes]
            l, dW = svm_loss_naive(self.W, X_batch, y_batch, reg)
            #from cs231n.gradient_check import grad_check_sparse
            #loss1, grad1 = svm_loss_naive(self.W, X_batch, y_batch, 5e1)
            #f = lambda w: svm_loss_naive(w, X_batch, y_batch, 5e1)[0]
            #grad_numerical = grad_check_sparse(f, self.W, grad1)
            if it % 100 == 0 and verbose:
                print("%d : %f" % (it, l))
            loss_history.append(l)
            self.W += -learning_rate * dW  # perform parameter update
        #l, dW = svm_loss_vectorized(self.W, X_batch, y_batch, reg)

    # perform parameter update
    #########################################################################
    # TODO:                                                                 #
    # Update the weights using the gradient and the learning rate.          #
    #########################################################################

    #########################################################################
    #                       END OF YOUR CODE                                #
    #########################################################################

        return loss_history
Example #20
0
# ## SVM Classifier
#
# Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**.
#
# As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function.

# In[ ]:

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001)
print 'loss: %f' % (loss, )

# The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function.
#
# To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you:

# In[ ]:

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
# loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
Example #21
0
mean_image = np.mean(X_train, axis=0)
X_train -= mean_image
X_val -= mean_image
X_test -= mean_image
X_dev -= mean_image
# Append bias dimension of ones (bias trick) so SVM only needs to optimize
# a single weight matrix W
X_train = np.hstack([X_train, np.ones(
    (X_train.shape[0], 1))])  # second dim becomes 3072+1=3073
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])
''' Evaluate naive implementation of loss '''
# Generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001  # 3073x10 (10 classes)
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0)

# # Numerically compute the gradient along several randomly chosen dimensions
# #   and compare with analytically computed gradient (grad)
# f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0] # Returns the loss
# grad_numerical = grad_check_sparse(f, W, grad)
# # Again with the regularization turned on
# loss, grad = svm_loss_naive(W, X_dev, y_dev, 5e1)
# f = lambda w: svm_loss_naive(w, X_dev, y_dev, 5e1)[0] # Returns the loss
# grad_numerical = grad_check_sparse(f, W, grad)
''' Evaluate vectorized implementation of loss '''
loss_v, grad_v = svm_loss_vectorized(W, X_dev, y_dev, 0)
print("Gradient difference", np.linalg.norm(grad - grad_v))
print("Loss difference", loss - loss_v)
''' Implement Stochastic Gradient Descent to minimize loss '''
svm = LinearSVM()
Example #22
0
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])


#print X_train[0]
print X_train.shape, X_val.shape, X_test.shape, X_dev.shape


# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001)
print 'loss: %f' % (loss, )


# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
#loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
from cs231n.gradient_check import grad_check_sparse
#f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[0]
#grad_numerical = grad_check_sparse(f, W, grad)
Example #23
0
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print(X_train.shape, X_val.shape, X_test.shape, X_dev.shape)

#%%
# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))

#%%
# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
from cs231n.gradient_check import grad_check_sparse
f = lambda w: svm_loss_naive(w, X_dev, y_dev, 0.0)[
    0]  # lambda operator, create a throw-away function
Example #24
0
def test2():
    cifar10_dir = 'cs231n/datasets/cifar-10-batches-py'
    X_train, y_train, X_test, y_test = load_CIFAR10(cifar10_dir)

    num_training = 49000
    num_validation = 1000
    mask = range(num_training, num_training + num_validation)
    X_val = X_train[mask]
    y_val = y_train[mask]

    mask = range(num_training)
    X_train = X_train[mask]
    y_train = y_train[mask]

    num_test = 1000
    mask = range(num_test)
    X_test = X_test[mask]
    y_test = y_test[mask]

    # print 'Train data shape: ', X_train.shape
    # print 'Train labels shape: ', y_train.shape
    # print 'Validation data shape: ', X_val.shape
    # print 'Validation labels shape: ', y_val.shape
    # print 'Test data shape: ', X_test.shape
    # print 'Test labels shape: ', y_test.shape

    # Preprocessing: reshape the image data into rows
    X_train = np.reshape(X_train, (X_train.shape[0], -1))
    X_val = np.reshape(X_val, (X_val.shape[0], -1))
    X_test = np.reshape(X_test, (X_test.shape[0], -1))

    # As a sanity check, print out the shapes of the data
    # print 'Training data shape: ', X_train.shape
    # print 'Validation data shape: ', X_val.shape
    # print 'Test data shape: ', X_test.shape

    mean_image = np.mean(X_train, axis=0)
    #print mean_image[:10]
    #plt.figure(figsize=(4,4))
    #plt.imshow(mean_image.reshape((32,32,3)).astype('uint8'))
    #plt.savefig('./figures/svm_mean.png')
    X_train -= mean_image
    X_val -= mean_image
    X_test -= mean_image

    X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T
    X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T
    X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T

    svm_sgd(X_train, y_train, X_val, y_val, X_test, y_test)
    #softmax_sgd(X_train,y_train, X_val, y_val,X_test,y_test)
    return
    W = np.random.randn(10, 3073) * 0.0001
    # loss, grad = svm_loss_naive(W,X_train, y_train,0.00001)
    # print 'loss: %f' %(loss,)

    #loss, grad = svm_loss_naive(W,X_train,y_train,0.0)
    #loss, grad = svm_loss_vectorized(W,X_train,y_train,0.0)

    #f = lambda w: svm_loss_naive(w,X_train, y_train,0.0)[0]
    #grad_check_sparse(f, W, grad, 10)

    tic = time.time()
    loss_naive, grad_naive = svm_loss_naive(W, X_train, y_train, 0.00001)
    toc = time.time()
    print 'Naive loss: %e ,computed in %fs' % (loss_naive, toc - tic)

    tic = time.time()
    loss_vectorized, grad_vector = svm_loss_vectorized(W, X_train, y_train,
                                                       0.00001)
    toc = time.time()
    print 'Vectorized loss: %e, computed in %fs' % (loss_vectorized, toc - tic)

    # The losses should match but your vectorized implementation should be much faster.
    print 'difference: %f' % (loss_naive - loss_vectorized)
    difference = np.linalg.norm(grad_naive - grad_vector, ord='fro')
    print 'difference of grad :%f' % difference
Example #25
0
# ## SVM Classifier
# 
# Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**. 
# 
# As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function. 

# In[ ]:

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001 

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001)
print 'loss: %f' % (loss, )


# The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function.
# 
# To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you:

# In[ ]:

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)
# ## SVM Classifier
#
# Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**.
#
# As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function.

# In[9]:

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(10, 3073) * 0.0001
loss, grad = svm_loss_naive(W, X_train, y_train, 0.00001)
print 'loss: %f' % (loss, )

# The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function.
#
# To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you:

# In[12]:

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
#loss, grad = svm_loss_naive(W, X_train, y_train, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
Example #27
0
# only has to worry about optimizing a single weight matrix W.
X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))])
X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))])
X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))])
X_dev = np.hstack([X_dev, np.ones((X_dev.shape[0], 1))])

print X_train.shape, X_val.shape, X_test.shape, X_dev.shape

#%%

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001 

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.00001)
print 'loss: %f' % (loss, )











Example #28
0
# ## SVM Classifier
#
# Your code for this section will all be written inside **cs231n/classifiers/linear_svm.py**.
#
# As you can see, we have prefilled the function `compute_loss_naive` which uses for loops to evaluate the multiclass SVM loss function.

# In[31]:

# Evaluate the naive implementation of the loss we provided for you:
from cs231n.classifiers.linear_svm import svm_loss_naive
import time

# generate a random SVM weight matrix of small numbers
W = np.random.randn(3073, 10) * 0.0001

loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))

# The `grad` returned from the function above is right now all zero. Derive and implement the gradient for the SVM cost function and implement it inline inside the function `svm_loss_naive`. You will find it helpful to interleave your new code inside the existing function.
#
# To check that you have correctly implemented the gradient correctly, you can numerically estimate the gradient of the loss function and compare the numeric estimate to the gradient that you computed. We have provided code that does this for you:

# In[32]:

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
def SVM(train_data, train_label, validation_data, validation_label, test_data, test_label):
    W = np.random.randn(10, 3072) * 0.0001
    loss, grad = svm_loss_naive(W, train_data, train_label, 0.000005)
    print 'loss: %f \n' % loss
    '''
    f=lambda w: svm_loss_naive(w, train_data,train_label,0.0)[0]
    grad_numerical=grad_check_sparse(f,W,grad,10)
    loss, grad = svm_loss_naive(W,train_data,train_label,5e1)
    f=lambda w:svm_loss_naive(w,train_data,train_label,5e1)[0]
    grad_numerical=grad_check_sparse(f,W,grad,10)

    t1 = time.time()
    loss_naive, grad_naive = svm_loss_naive(W, train_data, train_label, 0.000005)
    t2 = time.time()
    print '\nNaive Loss: %e computed in %fs'%(loss_naive, t2-t1)

    t1 = time.time()
    loss_vectorized,grad_vectorized = svm_loss_vectorized(W, train_data, train_label, 0.000005)
    t2 = time.time()
    print 'Vectorised loss and gradient: %e computed in %fs\n'%(loss_vectorized, t2-t1)

    difference = np.linalg.norm(grad_naive-grad_vectorized, ord='fro')
    print 'difference: %f'%difference
    '''
    from cs231n.classifiers import LinearSVM

    svm = LinearSVM()
    t1 = time.time()
    loss_hist = svm.train(train_data, train_label,
                          learning_rate=1e-7, reg=5e4, num_iters=1000, verbose=True)
    t2 = time.time()
    print 'That took %fs' % (t2 - t1)

    plt.plot(loss_hist)
    plt.xlabel('Iteration number')
    plt.ylabel('Loss value')
    plt.show()

    train_label_predict = svm.predict(train_data)
    print 'Training accuracy: %f' % np.mean(train_label == train_label_predict)
    validation_label_predict = svm.predict(validation_data)
    print 'Validation accuracy: %f' % np.mean(validation_label == validation_label_predict)

    learning_rates = [1e-7, 2e-7, 5e-7, 1e-6]
    regularization_strengths = [1e4, 2e4, 5e4, 1e5, 5e5, 1e6]

    results = {}
    best_val = -1
    best_svm = None

    for learning in learning_rates:
        for regularization in regularization_strengths:
            svm = LinearSVM()
            svm.train(train_data, train_label, learning_rate=learning,
                      reg=regularization, num_iters=2000)
            train_label_predict = svm.predict(train_data)
            train_accuracy = np.mean(train_label_predict == train_label)
            print 'Training accuracy: %f' % train_accuracy
            validation_label_predict = svm.predict(validation_data)
            val_accuracy = np.mean(validation_label_predict == validation_label)
            print 'Validation accuracy: %f' % val_accuracy

            if val_accuracy > best_val:
                best_val = val_accuracy
                best_svm = svm

            results[(learning, regularization)] = (
                train_accuracy, val_accuracy)

    for lr, reg in sorted(results):
        train_accuracy, val_accuracy = results[(lr, reg)]
        print 'lr %e reg %e train accuracy: %f val accuracy %f' % (lr, reg, train_accuracy, val_accuracy)
    print 'Best validation accuracy achieved during cross validation: %f ' % best_val

    x_scatter = [math.log10(x[0]) for x in results]
    y_scatter = [math.log10(x[1]) for x in results]

    sz = [results[x][0] * 1500 for x in results]
    plt.subplot(1, 1, 1)
    plt.scatter(x_scatter, y_scatter, sz)
    plt.xlabel('log learning rate')
    plt.ylabel('log regularization strength')
    plt.title('Cifar-10 training accuracy')
    plt.show()

    sz = [results[x][1] * 1500 for x in results]
    plt.subplot(1, 1, 1)
    plt.scatter(x_scatter, y_scatter, sz)
    plt.xlabel('log learning rate')
    plt.ylabel('log regularization strength')
    plt.title('Cifar-10 validation accuracy')
    plt.show()

    y_test_pred = best_svm.predict(test_data)
    test_accuracy = np.mean(y_test_pred == test_label)
    print 'Linear SVM on raw pixels final test set accuracy: %f' % test_accuracy

    print best_svm.W.shape
    w = best_svm.W[:, :]
    print w.shape
    w = w.reshape(10, 32, 32, 3)
    w_min, w_max = np.min(w), np.max(w)
    classes = ['plane', 'car', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']
    for i in xrange(10):
        plt.subplot(2, 5, i + 1)
        wimg = 255.0 * (w[i].squeeze() - w_min) / (w_max - w_min)
        plt.imshow(wimg.astype('uint8'))
        plt.axis('off')
        plt.title(classes[i])
    plt.show()