Exemple #1
0
def test_softmax_loss_vectorized_numerical_check(sample_train,
                                                 train_count,
                                                 reg=0.0,
                                                 check_count=20):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[0]
    g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[1]
    #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h)
    #grad(loss) = grad vectorized

    num_checks = check_count
    grad_analytic = g(W)
    for i in range(num_checks):
        ix = tuple([random.randrange(m) for m in W.shape])
        shift = np.zeros(W.shape)
        shift[ix] = 1e-7
        grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7)
        assert (abs(grad_numerical - grad_analytic[ix]) /
                (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
Exemple #2
0
def test_softmax_loss_vectorized_comparison_mean(sample_train, train_count, reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    mean_image = np.mean(Xtrain, axis=0)
    Xtrain_mean_removed = Xtrain - mean_image

    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])
    Xtrain_mean_removed = np.hstack([Xtrain_mean_removed, np.ones((Xtrain_mean_removed.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_mean_removed, grad_mean_removed = softmax_loss_vectorized(W, Xtrain_mean_removed, ytrain, reg)
    #assert np.abs(loss - loss_mean_removed) > 0.01
    assert np.linalg.norm(grad - grad_mean_removed) > 1.0
Exemple #3
0
def softmax_test_vectorized(x, y):
    # 随机生成一个很小的Softmax权重矩阵,先标准正态分布,然后乘0.0001
    W = np.random.randn(3073, 10) * 0.0001
    # 计算softmax分类器的损失和权重的梯度(无正则项)
    loss, gradient = softmax_loss_vectorized(W, x, y, 0.0)
    # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中
    # 定义一个lambda表达式,计算损失值loss,
    f = lambda w: softmax_loss_vectorized(w, x, y, 0.0)[0]
    grad_check_sparse(f, W, gradient)
    print('turn on regularization')
    # 计算softmax分类器的损失和权重的梯度(有正则项)
    loss, gradient = softmax_loss_vectorized(W, x, y, 5e1)
    # 随机选取W中的几个维度,计算数值梯度和解析梯度进行对比,验证正确性。 随机选取过程在gradient_check中
    # 定义一个lambda表达式,计算损失值loss,
    f = lambda w: softmax_loss_vectorized(w, x, y, 5e1)[0]
    grad_check_sparse(f, W, gradient)
Exemple #4
0
def test_softmax_loss_vectorized_numerical_gradient(sample_train, train_count, reg=0.0):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[0]
    g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[1]

    grad_analytic = g(W)
    param_grad_num = eval_numerical_gradient(f, W, verbose=False, h=1e-7)
    assert rel_error(param_grad_num, grad_analytic) < 1e-4
def Softmax(train_data, train_label, validation_data, validation_label, test_data, test_label):
    W = np.random.randn(10, 3072) * 0.0001
    '''
    loss, grad = softmax_loss_naive(W, train_data, train_label, 0.000005)
    print 'loss: %f \n' % loss
    print 'sanity check: %f' % (-np.log(0.1))

    def f(w): return softmax_loss_naive(w, train_data, train_label, 0.0)[0]
    grad_numerical = grad_check_sparse(f, W, grad, 10)

    loss, grad = softmax_loss_naive(W, train_data, train_label, 5e1)

    def f(w): return softmax_loss_naive(w, train_data, train_label, 5e1)[0]
    grad_numerical = grad_check_sparse(f, W, grad, 10)
    '''
    tic = time.time()
    loss_naive, grad_naive = softmax_loss_naive(
        W, train_data, train_label, 0.000005)
    toc = time.time()
    print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))

    tic = time.time()
    loss_vectorized, grad_vectorized = softmax_loss_vectorized(
        W, train_data, train_label, 0.000005)
    toc = time.time()
    print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

    grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
    print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
    print('Gradient difference: %f' % grad_difference)
Exemple #6
0
def test_softmax_loss_naive_vectorized_comparison(sample_train_with_bias, train_count):
    Xtrain, ytrain = sample_train_with_bias(count=train_count)

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
    loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, 1e2)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Exemple #7
0
def test_softmax_loss_vectorized_no_bias_X(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    W = np.random.randn(Xtrain.shape[1] + 1, 10) * 0.0001
    with pytest.raises(ValueError):
        loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemple #8
0
def test_softmax_loss_vectorized_numerical_gradient(sample_train,
                                                    train_count,
                                                    reg=0.0):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[0]
    g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, reg)[1]

    grad_analytic = g(W)
    param_grad_num = eval_numerical_gradient(f, W, verbose=False, h=1e-7)
    assert rel_error(param_grad_num, grad_analytic) < 1e-4
Exemple #9
0
def test_softmax_loss_vectorized_no_bias_X(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    W = np.random.randn(Xtrain.shape[1] + 1,10) * 0.0001
    with pytest.raises(ValueError):
        loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemple #10
0
def test_softmax_loss_naive_vectorized_comparison(sample_train_with_bias,
                                                  train_count):
    Xtrain, ytrain = sample_train_with_bias(count=train_count)

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
    loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, 1e2)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Exemple #11
0
def test_softmax_loss_vectorized_comparison_mean(sample_train, train_count,
                                                 reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))

    mean_image = np.mean(Xtrain, axis=0)
    Xtrain_mean_removed = Xtrain - mean_image

    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])
    Xtrain_mean_removed = np.hstack(
        [Xtrain_mean_removed,
         np.ones((Xtrain_mean_removed.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_mean_removed, grad_mean_removed = softmax_loss_vectorized(
        W, Xtrain_mean_removed, ytrain, reg)
    #assert np.abs(loss - loss_mean_removed) > 0.01
    assert np.linalg.norm(grad - grad_mean_removed) > 1.0
Exemple #12
0
def test_softmax_loss_naive_vectorized_comparison_reg(sample_train, train_count, reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, reg)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Exemple #13
0
def run_softmax_naive(X_train, y_train):
    # Generate a random softmax weight matrix and use it to compute the loss.
    W = np.random.randn(10, 3073) * 0.0001
    start = time.clock()
    loss, grad = softmax_loss_vectorized(W, X_train, y_train, 0.0)
    end = time.clock()
    print "softmax_loss_naive: %f s" % (end - start)
    # As a rough sanity check, our loss should be something close to -log(0.1).
    print 'loss: %f' % loss
    print 'sanity check: %f' % (-np.log(0.1))
Exemple #14
0
def test_softmax_loss_naive_vectorized_comparison_reg(sample_train,
                                                      train_count, reg):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, reg)
    loss_naive, grad_naive = softmax_loss_naive(W, Xtrain, ytrain, reg)
    assert np.abs(loss - loss_naive) < 0.0001
    assert np.linalg.norm(grad - grad_naive) < 0.0001
Exemple #15
0
def test_softmax_loss_vectorized_no_bias_W(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    #using the incorrect W size
    W = np.random.randn(Xtrain.shape[1], 10) * 0.0001

    #add the bias dimension (to X)
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    with pytest.raises(ValueError):
        loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemple #16
0
def test_softmax_loss_vectorized_no_bias_W(sample_train, sample_test):
    Xtrain, ytrain = sample_train(count=40)
    Xtest, ytest   = sample_test(count=20)

    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    #using the incorrect W size
    W = np.random.randn(Xtrain.shape[1],10) * 0.0001

    #add the bias dimension (to X)
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    with pytest.raises(ValueError):
        loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 1e2)
Exemple #17
0
def test_softmax_loss_vectorized_numerical_check(sample_train, train_count, reg=0.0, check_count=20):
    Xtrain, ytrain = sample_train(count=train_count)
    Xtrain = np.reshape(Xtrain, (Xtrain.shape[0], -1))
    mean_image = np.mean(Xtrain, axis=0)
    Xtrain -= mean_image
    Xtrain = np.hstack([Xtrain, np.ones((Xtrain.shape[0], 1))])

    W = np.random.randn(Xtrain.shape[1],10) * 0.0001
    loss, grad = softmax_loss_vectorized(W, Xtrain, ytrain, 0.)

    f = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[0]
    g = lambda w: softmax_loss_vectorized(w, Xtrain, ytrain, 0.0)[1]
    #(f(W+vec(h)) - f(W-vec(h)))/2/|vec(h)| = approximately dot(f'(W),vec(h)) * vec(h)
    #grad(loss) = grad vectorized

    num_checks = check_count
    grad_analytic = g(W)
    for i in range(num_checks):
        ix = tuple([random.randrange(m) for m in W.shape])
        shift = np.zeros(W.shape)
        shift[ix] = 1e-7
        grad_numerical = (f(W + shift) - f(W - shift)) / (2 * 1e-7)
        assert( abs(grad_numerical - grad_analytic[ix]) / (abs(grad_numerical) + abs(grad_analytic[ix])) < 0.0001)
Exemple #18
0
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 1e2)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)

# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)

from cs231n.classifiers.softmax import softmax_loss_vectorized

tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(
    W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)

# As we did for the SVM, we use the Frobenius norm to compare the two versions
# of the gradient.
grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
print 'Gradient difference: %f' % grad_difference

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.
from cs231n.classifiers.linear_classifier import Softmax
 def loss(self, X_batch, y_batch, reg):
     return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
Exemple #20
0
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)

#%%
# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))

from cs231n.classifiers.softmax import softmax_loss_vectorized
tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(
    W, X_dev, y_dev, 0.000005)
toc = time.time()
print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

# As we did for the SVM, we use the Frobenius norm to compare the two versions
# of the gradient.
grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
print('Gradient difference: %f' % grad_difference)

# As we did for the SVM, use numeric gradient checking as a debugging tool.
# The numeric gradient should be close to the analytic gradient.
from cs231n.gradient_check import grad_check_sparse
loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 5e1)
f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)
Exemple #21
0
# yTrain = np.reshape(yTrain[:lengthTrain])
yTrain = yTrain[:lengthTrain]
xTest = np.reshape(xTest[:lengthTest],(lengthTest,-1))
# yTest = np.reshape(yTest[:lengthTest])
yTest = yTest[:lengthTest]
xTrain = (xTrain - np.mean(xTrain,axis = 0))/(np.std(xTrain,axis = 0))

W = np.random.randn(xTrain.shape[1],10)*0.001
loss,grad = softmax.softmax_loss_naive(W,xTrain,yTrain,100)
# exit()

f = lambda w: softmax.softmax_loss_naive(w,xTrain,yTrain,0)[0]
# grad_numerical = gradient_check.grad_check_sparse(f,W,grad,10)

loss_naive,grad_naive = softmax.softmax_loss_naive(W,xTrain,yTrain,0)
loss_vectorized,grad_vectorized = softmax.softmax_loss_vectorized(W,xTrain,yTrain,0)


grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print(loss_naive)
print(loss_vectorized)
print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
print('Gradient difference: %f' % grad_difference)

# exit()

learning_rate = 3e-4
regStrengths = xrange(0,200,50)
cvFold = 5
bestAccuracy = -1
bestReg = -1
Exemple #22
0
# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(10, 3073) * 0.0001
loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0)

# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_train, y_train, 0.00001)
toc = time.time()
print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)

from cs231n.classifiers.softmax import softmax_loss_vectorized
tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(
    W, X_train, y_train, 0.00001)
toc = time.time()
print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)

# As we did for the SVM, we use the Frobenius norm to compare the two versions
# of the gradient.
grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
print 'Gradient difference: %f' % grad_difference

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.
from cs231n.classifiers.linear_classifier import Softmax
results = {}
Exemple #23
0

# In[ ]:

# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)

from cs231n.classifiers.softmax import softmax_loss_vectorized
tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)

# As we did for the SVM, we use the Frobenius norm to compare the two versions
# of the gradient.
grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
print 'Gradient difference: %f' % grad_difference


# In[ ]:

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
Exemple #24
0
import h5py
from cs231n.classifiers import Softmax
from numpy import loadtxt
import numpy as np
h5f = h5py.File('img_data.h5','r')
X = h5f['dataset_1'][:]
h5f.close()
y = loadtxt("y_labels.txt", dtype=np.uint8, delimiter="\n", unpack=False)

#X_train = np.zeros((27116,196608))
#y_train = np.zeros(27116)
#X_val = np.zeros((5000,196608))
#y_val = np.zeros(5000)

X_train = X[8000:35117,:]
y_train = y[8000:35117]
X_val=X[3000:8000,:]
y_val=y[3000:8000]
# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(196608, 5) * 0.0001
loss_vectorized, grad_vectorized = softmax_loss_vectorized(W, X_train, y_train, 0.00001)
softmax=Softmax()
loss_hist = softmax.train(X_train, y_train, learning_rate=1e-7, reg=5e4,
                      num_iters=1500, verbose=False)
y_train_pred = softmax.predict(X_train)
training_accuracy = np.mean(y_train == y_train_pred)
y_val_pred = softmax.predict(X_val)
val_accuracy = np.mean(y_val == y_val_pred)
print 'training accuracy: %f' % (np.mean(y_train == y_train_pred), )
print 'validation accuracy: %f' % (np.mean(y_val == y_val_pred), )
Exemple #25
0
# Cleaning up variables to prevent loading data multiple times (which may cause memory issue)
try:
    del X_train, y_train
    del X_test, y_test
    print('Clear previously loaded data.')
except:
    pass

# Invoke the above function to get our data.
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = get_CIFAR10_data(
)
print('Train data shape: ', X_train.shape)
print('Train labels shape: ', y_train.shape)
print('Validation data shape: ', X_val.shape)
print('Validation labels shape: ', y_val.shape)
print('Test data shape: ', X_test.shape)
print('Test labels shape: ', y_test.shape)
print('dev data shape: ', X_dev.shape)
print('dev labels shape: ', y_dev.shape)

from cs231n.classifiers.softmax import softmax_loss_vectorized
import time

# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0.0)

# As a rough sanity check, our loss should be something close to -log(0.1).
print('loss: %f' % loss)
print('sanity check: %f' % (-np.log(0.1)))
Exemple #26
0
# In[ ]:

# First implement the naive softmax loss function with nested loops.
# Open the file cs231n/classifiers/softmax.py and implement the
# softmax_loss_naive function.

from cs231n.classifiers.softmax import softmax_loss_naive
import time

# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(3073, 10) * 0.0001

from cs231n.classifiers.softmax import softmax_loss_vectorized
tic = time.time()
loss, grad  = softmax_loss_vectorized(W, X_dev, y_dev, 0.00001)
toc = time.time()
print 'vectorized loss: %e computed in %fs' % (loss, toc - tic)

from cs231n.gradient_check import grad_check_sparse
f = lambda w: softmax_loss_vectorized(w, X_dev, y_dev, 0.00001)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)

# As a rough sanity check, our loss should be something close to -log(0.1).
print 'loss: %f' % loss
print 'sanity check: %f' % (-np.log(0.1))

"""
###################
I dont want to implement naive version !
BY BINGO
 def loss(self, X_batch, y_batch, reg):
     return softmax_loss_vectorized(self.W, X_batch, y_batch, reg)
Exemple #28
0
print 'Validation labels shape: ', y_val.shape
print 'Test data shape: ', X_test.shape
print 'Test labels shape: ', y_test.shape


# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(10, 3073) * 0.0001
loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0)

# As a rough sanity check, our loss should be something close to -log(0.1).
print 'loss: %f' % loss
print 'sanity check: %f' % (-np.log(0.1))

# Complete the implementation of softmax_loss_naive and implement a (naive)
# version of the gradient that uses nested loops.
loss, grad = softmax_loss_vectorized(W, X_train, y_train, 0.0)

# As we did for the SVM, use numeric gradient checking as a debugging tool.
# The numeric gradient should be close to the analytic gradient.

f = lambda w: softmax_loss_vectorized(w, X_train, y_train, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)


# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_train, y_train, 0.00001)
toc = time.time()