def loss(self, X, y, reg=1e-5, kind=0): if kind == 0: return loss_SVM(self.W, X, y, reg) elif kind == 1: return loss_softmax(self.W, X, y, reg) elif kind == 2: return softmax_loss_naive(self.W, X, y, reg)
def Softmax(train_data, train_label, validation_data, validation_label, test_data, test_label): W = np.random.randn(10, 3072) * 0.0001 ''' loss, grad = softmax_loss_naive(W, train_data, train_label, 0.000005) print 'loss: %f \n' % loss print 'sanity check: %f' % (-np.log(0.1)) def f(w): return softmax_loss_naive(w, train_data, train_label, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) loss, grad = softmax_loss_naive(W, train_data, train_label, 5e1) def f(w): return softmax_loss_naive(w, train_data, train_label, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) ''' tic = time.time() loss_naive, grad_naive = softmax_loss_naive( W, train_data, train_label, 0.000005) toc = time.time() print('naive loss: %e computed in %fs' % (loss_naive, toc - tic)) tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( W, train_data, train_label, 0.000005) toc = time.time() print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)) grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized)) print('Gradient difference: %f' % grad_difference)
def test_softmax_random_weights(sample_train, weight_size=0.0001, regularization=1.0): Xtrain, ytrain = sample_train(count=7000) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(3073,10) * weight_size loss, grad = softmax_loss_naive(W, Xtrain, ytrain, regularization) assert loss > 1.8 and loss < 2.8
def test_softmax_loss_naive_vectorized_comparison(sample_train_with_bias, train_count): Xtrain, ytrain = sample_train_with_bias(count=train_count) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, 1e2) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_softmax_loss_naive_vectorized_comparison(sample_train_with_bias, train_count): Xtrain, ytrain = sample_train_with_bias(count=train_count) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, 1e2) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_softmax_random_weights(sample_train, weight_size=0.0001, regularization=1.0): Xtrain, ytrain = sample_train(count=7000) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(3073, 10) * weight_size loss, grad = softmax_loss_naive(W, Xtrain, ytrain, regularization) assert loss > 1.8 and loss < 2.8
def test_softmax_loss_naive_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #i.e. using the correct W size W = np.random.randn(Xtrain.shape[1] + 1,10) * 0.0001 with pytest.raises(ValueError): loss, grad = softmax_loss_naive(W, Xtrain, ytrain, 1e2)
def test_softmax_loss_naive_no_bias_X(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #i.e. using the correct W size W = np.random.randn(Xtrain.shape[1] + 1, 10) * 0.0001 with pytest.raises(ValueError): loss, grad = softmax_loss_naive(W, Xtrain, ytrain, 1e2)
def test_softmax_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1],10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, reg) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_softmax_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg): Xtrain, ytrain = sample_train(count=train_count) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg) loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, reg) assert np.abs(loss - loss_naive) < 0.0001 assert np.linalg.norm(grad - grad_naive) < 0.0001
def test_softmax_loss_naive_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1], 10) * 0.0001 #add the bias dimension Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = softmax_loss_naive(W, Xtrain, ytrain, 1e2)
def test_softmax_loss_naive_no_bias_W(sample_train, sample_test): Xtrain, ytrain = sample_train(count=40) Xtest, ytest = sample_test(count=20) Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1)) #using the incorrect W size W = np.random.randn(Xtrain.shape[1],10) * 0.0001 #add the bias dimension Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))]) with pytest.raises(ValueError): loss, grad = softmax_loss_naive(W, Xtrain, ytrain, 1e2)
print 'Validation data shape: ', X_val.shape print 'Validation labels shape: ', y_val.shape print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape print 'dev data shape: ', X_dev.shape print 'dev labels shape: ', y_dev.shape # First implement the naive softmax loss function with nested loops. # Open the file cs231n/classifiers/softmax.py and implement the # softmax_loss_naive function. from cs231n.classifiers.softmax import softmax_loss_naive # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). print 'loss: %f' % loss print 'sanity check: %f' % (-np.log(0.1)) # Complete the implementation of softmax_loss_naive and implement a (naive) # version of the gradient that uses nested loops. loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # As we did for the SVM, use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. from cs231n.gradient_check import grad_check_sparse f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10)
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data( ) print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) print('dev data shape: ', X_dev.shape) print('dev labels shape: ', y_dev.shape) from cs231n.classifiers.softmax import softmax_loss_naive import time W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # 粗略检查,损失应该是接近-log(0.1) print('loss: %f' % loss) print('sanity check: %f' % -(np.log(0.1))) # 梯度计算,使用数值梯度检验 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) from cs231n.gradient_check import grad_check_sparse f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) # 加入正则化 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1) f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0] grad_numerical = grad_check_sparse(f, W, grad, 10)
print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) print('dev data shape: ', X_dev.shape) print('dev labels shape: ', y_dev.shape) # First implement the naive softmax loss function with nested loops. # Open the file cs231n/classifiers/softmax.py and implement the # softmax_loss_naive function. # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) ''' # As a rough sanity check, our loss should be something close to -log(0.1). print('=============================================') print('loss using loop') print('=============================================') print('loss: %f' % loss) print('sanity check: %f' % (-np.log(0.1))) print('=============================================') print('gradient check for loop') print('=============================================') # Complete the implementation of softmax_loss_naive and implement a (naive) # version of the gradient that uses nested loops. loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
return X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev # Cleaning up variables to prevent loading data multiple times (which may cause memory issue) try: del X_train, y_train del X_test, y_test print('Clear previously loaded data.') except: pass # Invoke the above function to get our data. X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data( ) print('Train data shape: ', X_train.shape) print('Train labels shape: ', y_train.shape) print('Validation data shape: ', X_val.shape) print('Validation labels shape: ', y_val.shape) print('Test data shape: ', X_test.shape) print('Test labels shape: ', y_test.shape) print('dev data shape: ', X_dev.shape) print('dev labels shape: ', y_dev.shape) # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). print('loss: %f' % loss) print('sanity check: %f' % (-np.log(0.1)))
address = './cs231n/datasets/cifar-10-batches-py/' xTrain,yTrain,xTest,yTest = load_CIFAR10(address) lengthTrain = 5000 lengthTest = 100 xTrain = np.reshape(xTrain[:lengthTrain],(lengthTrain,-1)) # yTrain = np.reshape(yTrain[:lengthTrain]) yTrain = yTrain[:lengthTrain] xTest = np.reshape(xTest[:lengthTest],(lengthTest,-1)) # yTest = np.reshape(yTest[:lengthTest]) yTest = yTest[:lengthTest] xTrain = (xTrain - np.mean(xTrain,axis = 0))/(np.std(xTrain,axis = 0)) W = np.random.randn(xTrain.shape[1],10)*0.001 loss,grad = softmax.softmax_loss_naive(W,xTrain,yTrain,100) # exit() f = lambda w: softmax.softmax_loss_naive(w,xTrain,yTrain,0)[0] # grad_numerical = gradient_check.grad_check_sparse(f,W,grad,10) loss_naive,grad_naive = softmax.softmax_loss_naive(W,xTrain,yTrain,0) loss_vectorized,grad_vectorized = softmax.softmax_loss_vectorized(W,xTrain,yTrain,0) grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro') print(loss_naive) print(loss_vectorized) print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized)) print('Gradient difference: %f' % grad_difference)
print 'Test data shape: ', X_test.shape print 'Test labels shape: ', y_test.shape ################ # softmax ################ # First implement the naive softmax loss function with nested loops. # Open the file cs231n/classifiers/softmax.py and implement the # softmax_loss_naive function. from cs231n.classifiers.softmax import softmax_loss_naive import time # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(10, 3073) * 0.0001 loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_train, y_train, 0.00001) toc = time.time() print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic) from cs231n.classifiers.softmax import softmax_loss_vectorized tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized( W, X_train, y_train, 0.00001) toc = time.time()
# # Your code for this section will all be written inside **cs231n/classifiers/softmax.py**. # # In[ ]: # First implement the naive softmax loss function with nested loops. # Open the file cs231n/classifiers/softmax.py and implement the # softmax_loss_naive function. from cs231n.classifiers.softmax import softmax_loss_naive import time # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(3073, 10) * 0.0001 loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0) # As a rough sanity check, our loss should be something close to -log(0.1). print 'loss: %f' % loss print 'sanity check: %f' % (-np.log(0.1)) # ## Inline Question 1: # Why do we expect our loss to be close to -log(0.1)? Explain briefly.** # # **Your answer:** *Fill this in* # # In[ ]: # Complete the implementation of softmax_loss_naive and implement a (naive)
print "Test data shape: ", X_test.shape print "Test labels shape: ", y_test.shape ################ # softmax ################ # First implement the naive softmax loss function with nested loops. # Open the file cs231n/classifiers/softmax.py and implement the # softmax_loss_naive function. from cs231n.classifiers.softmax import softmax_loss_naive import time # Generate a random softmax weight matrix and use it to compute the loss. W = np.random.randn(10, 3073) * 0.0001 loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized. # The two versions should compute the same results, but the vectorized version should be # much faster. tic = time.time() loss_naive, grad_naive = softmax_loss_naive(W, X_train, y_train, 0.00001) toc = time.time() print "naive loss: %e computed in %fs" % (loss_naive, toc - tic) from cs231n.classifiers.softmax import softmax_loss_vectorized tic = time.time() loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_train, y_train, 0.00001) toc = time.time()
# third: append the bias dimension of ones (i.e. bias trick) so that our SVM # only has to worry about optimizing a single weight matrix W. # Also, lets transform both data matrices so that each image is a column X_train = np.hstack([X_train, np.ones((X_train.shape[0], 1))]).T X_val = np.hstack([X_val, np.ones((X_val.shape[0], 1))]).T X_test = np.hstack([X_test, np.ones((X_test.shape[0], 1))]).T print(X_train.shape, X_val.shape, X_test.shape) ####################################################################################### # Softmax Classifier # ####################################################################################### # Evaluate the naive implementation of the loss: # generate a random weight matrix of small numbers W = np.random.randn(num_classes, X_train.shape[0]) * 0.0001 loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0) print('softmax loss vaive is %f' % loss) print('sanity check: %f' % (-np.log(0.1))) # Complete the implementation of softmax_loss_naive and implement a (naive) # version of the gradient that uses nested loops. loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0) # As we did for the SVM, use numeric gradient checking as a debugging tool. # The numeric gradient should be close to the analytic gradient. f = lambda w: softmax_loss_naive(w, X_train, y_train, 0.0)[0] grad_numerical = grad_check_sparse(f, W, grad, 10) # Now that we have a naive implementation of the softmax loss function and its gradient, # implement a vectorized version in softmax_loss_vectorized.