예제 #1
0
파일: svm.py 프로젝트: HaoMood/cs231n
    def train_check(self, X, y, lamda = 1e-3):    
        # Gradient check.
        # Numerically compute the gradient along several randomly 
        # chosen dimensions, and  compare them with your analytically 
        # computed gradient. The numbers should match almost exactly # along all dimensions.
        J, dW, db = self._costFcn(self.W, self.b, X, y, lamda)

        print '\nGradient check on W'
        f = lambda W: self._costFcn(W, self.b, X, y, lamda)[0]
        grad_numerical = grad_check_sparse(f, self.W, dW)

        print '\nGradient check on b'
        f = lambda b: self._costFcn(self.W, b, X, y, lamda)[0]
        grad_numerical = grad_check_sparse(f, self.b, db)   
예제 #2
0
    def train_check(self, X, y, lamda=1e-3):
        # Gradient check.
        # Numerically compute the gradient along several randomly
        # chosen dimensions, and  compare them with your analytically
        # computed gradient. The numbers should match almost exactly # along all dimensions.
        J, dW, db = self._costFcn(self.W, self.b, X, y, lamda)

        print '\nGradient check on W'
        f = lambda W: self._costFcn(W, self.b, X, y, lamda)[0]
        grad_numerical = grad_check_sparse(f, self.W, dW)

        print '\nGradient check on b'
        f = lambda b: self._costFcn(self.W, b, X, y, lamda)[0]
        grad_numerical = grad_check_sparse(f, self.b, db)
예제 #3
0
    def train_check(self, X, y, lamda = 1e-3):    
        # Gradient check.
        # Numerically compute the gradient along several randomly 
        # chosen dimensions, and  compare them with your analytically 
        # computed gradient. The numbers should match almost exactly # along all dimensions.
        J, dW, db = self._costFcn(X, y, lamda)
        print 'J =', J, 'sanity check =', np.log(10)

        for l in xrange(1, 3):
            print '\n grad. check on W', l 
            f = lambda W: self._costFcn(X, y, lamda)[0]
            grad_numerical = grad_check_sparse(f, self.W[l], dW[l])

            print '\n grad. check on b', l 
            f = lambda b: self._costFcn(X, y, lamda)[0]
            grad_numerical = grad_check_sparse(f, self.b[l], db[l])
예제 #4
0
W = np.random.randn(3073, 10) * 0.0001

loss, grad = linear_svm.svm_loss_naive(W, X_dev, y_dev, 0.000005)
print('loss: %f' % (loss, ))

# Once you've implemented the gradient, recompute it with the code below
# and gradient check it with the function we provided for you

# Compute the loss and its gradient at W.
loss, grad = linear_svm.svm_loss_naive(W, X_dev, y_dev, 0.0)

# Numerically compute the gradient along several randomly chosen dimensions, and
# compare them with your analytically computed gradient. The numbers should match
# almost exactly along all dimensions.
f = lambda w: linear_svm.svm_loss_naive(w, X_dev, y_dev, 0.0)[0]
grad_numerical = gradient_check.grad_check_sparse(f, W, grad)

# do the gradient check once again with regularization turned on
# you didn't forget the regularization gradient did you?
loss, grad = linear_svm.svm_loss_naive(W, X_dev, y_dev, 5e1)
f = lambda w: linear_svm.svm_loss_naive(w, X_dev, y_dev, 5e1)[0]
grad_numerical = gradient_check.grad_check_sparse(f, W, grad)

# Next implement the function svm_loss_vectorized; for now only compute the loss;
# we will implement the gradient in a moment.
tic = time.time()
loss_naive, grad_naive = linear_svm.svm_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))

tic = time.time()
예제 #5
0
print('Naive loss: %e computed in %fs' % (loss_naive, toc - tic))

from Softmax import softmax_loss_vec
tic = time.time()
loss_vec, grad_vec = softmax_loss_vec(W, X_dev, Y_dev, 0.00001)
toc = time.time()
print('Vectorized loss: %e computed in %fs' % (loss_vec, toc - tic))

print('loss difference: %f' % (loss_naive - loss_vec))
print('gradient difference: %f' %
      (np.linalg.norm(grad_naive - grad_vec, ord='fro')))

# check the gradient
from gradient_check import grad_check_sparse
f = lambda w: softmax_loss_vec(w, X_dev, Y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad_vec)

loss_vec, grad_vec = softmax_loss_vec(W, X_dev, Y_dev, 1e2)
f = lambda w: softmax_loss_vec(w, X_dev, Y_dev, 1e2)[0]
grad_numerical = grad_check_sparse(f, W, grad_vec)

# implement SGD and find best parameters
from linear_classifier import Softmax
results = {}
best_val = -1
beat_softmax = None

learning_rates = [10**(x * 2) for x in range(-3, 3)]
reg_strength = [10**(x * 2) for x in range(-3, 3)]

for lr in learning_rates:
예제 #6
0
loss, grad = softmax_loss_vectorized(theta, X_train, y_train, 0.0)
=======
loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0)
>>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', - np.log(0.1)

# Use numeric gradient checking as a debugging tool.
# The numeric gradient should be close to the analytic gradient. (within 1e-7)

from gradient_check import grad_check_sparse
f = lambda th: softmax_loss_naive(th, X_train, y_train, 0.0)[0]
grad_numerical = grad_check_sparse(f, theta, grad, 10)

# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.

<<<<<<< HEAD

=======
>>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001)
toc = time.time()
print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)
예제 #7
0
x_val=np.hstack(  [x_val,np.ones((x_val.shape[0],1))])
x_dev=np.hstack(  [x_dev,np.ones((x_dev.shape[0],1))])
x_test=np.hstack(  [x_test,np.ones((x_test.shape[0],1))])
print(x_train.shape,y_train.shape,x_val.shape,y_val.shape,x_dev.shape,y_dev.shape,x_test.shape,y_test.shape)
#main code
'''
#test svm naive
w=np.random.randn(3073,10)*0.0001
loss,grad=svm_loss_naive(w,x_dev,y_dev,1e2)
f= lambda w:svm_loss_naive(w,x_dev,y_dev,1e2)[0]
grad_numerical=grad_check_sparse(f,w,grad)
print('loss is : %f' %loss)
'''
'''
#test svm linearclassifier
svm=LinearSVM()
tic=time.time()
loss_hist=svm.train(x_train,y_train,learning_rate=1e-7,reg=5e4,num_iters=2500,verbose=True)
toc=time.time()
print('that took %f s' %(toc-tic))
y_train_pred=svm.predict(x_train)
y_val_pred=svm.predict(x_val)
print('training accuracy: %f ' %(np.mean(y_train==y_train_pred)))
print('validation accuracy: %f ' %(np.mean(y_val==y_val_pred)))
'''
#test softmax naive
w=np.random.randn(3073,10)*0.0001
loss,grad=softmax_loss_naive(w,x_dev,y_dev,1e3)
f= lambda w:softmax_loss_naive(w,x_dev,y_dev,1e3)[0]
grad_numerical=grad_check_sparse(f,w,grad)
print('loss is : %f' %loss)
예제 #8
0
print("use a random weight to compute loss.....")
# Generate a random softmax weight matrix and use it to compute the loss.
W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
# As a rough sanity check, our loss should be something close to -log(0.1).
print('loss: %f' % loss)
print('sanity check: %f' % (-np.log(0.1)))

print("check the naive gradient............")
# Complete the implementation of softmax_loss_naive and implement a (naive)
# version of the gradient that uses nested loops.
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 0.0)
# As we did for the SVM, use numeric gradient checking as a debugging tool.
# The numeric gradient should be close to the analytic gradient.
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)
print("check the  another naive gradient............")
# similar to SVM case, do another gradient check with regularization
loss, grad = softmax_loss_naive(W, X_dev, y_dev, 5e1)
f = lambda w: softmax_loss_naive(w, X_dev, y_dev, 5e1)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)

print("the time between naive and vectorized..........")
# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(W, X_dev, y_dev, 0.000005)
toc = time.time()
print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))