예제 #1
0
 def test_softmax_loss_vectorized_gradient(self):
     loss_naive, grad_naive = softmax_loss_naive(self.weights, self.x,
                                                 self.y, self.reg)
     loss_vect, grad_vect = softmax_loss_vectorized(self.weights, self.x,
                                                    self.y, self.reg)
     np.testing.assert_allclose(loss_naive, loss_vect, 1e-04)
     np.testing.assert_allclose(grad_naive, grad_vect, 1e-04)
예제 #2
0
    def loss(self, X, y=None, reg=0.0):
        """
    Compute the loss and gradients for a two layer fully connected neural
    network.

    Inputs:
    - X: Input data of shape (N, D). Each X[i] is a training sample.
    - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
      an integer in the range 0 <= y[i] < C. This parameter is optional; if it
      is not passed then we only return scores, and if it is passed then we
      instead return the loss and gradients.
    - reg: Regularization strength.

    Returns:
    If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
    the score for class c on input X[i].

    If y is not None, instead return a tuple of:
    - loss: Loss (data loss and regularization loss) for this batch of training
      samples.
    - grads: Dictionary mapping parameter names to gradients of those parameters
      with respect to the loss function; has the same keys as self.params.
    """
        # Unpack variables from the params dictionary
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape

        X1 = np.insert(X, 0, 1, axis=1)

        W11 = np.insert(W1, 0, b1, axis=0)
        W21 = np.insert(W2, 0, b2, axis=0)

        # Compute the forward pass
        scores = None
        z1 = X.dot(W11)
        Layer1 = np.maximum(0, z1)

        scores = Layer1.dot(W21)
        #############################################################################
        # TODO: Perform the forward pass, computing the class scores for the input. #
        # Store the result in the scores variable, which should be an array of      #
        # shape (N, C).                                                             #
        #############################################################################
        pass
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        # If the targets are not given then jump out, we're done
        if y is None:
            return scores
        loss = 0.0
        # Compute the loss
        L = np.exp(scores)
        #print sc
        for i in range(X.shape[0]):
            loss -= scores[i][y[i]]
            loss += math.log(sum(L[i]))
        loss /= X.shape[0]
        loss += +0.5 * reg * np.sum(W1 * W1) + 0.5 * reg * np.sum(W2 * W2)

        #############################################################################
        # TODO: Finish the forward pass, and compute the loss. This should include  #
        # both the data loss and L2 regularization for W1 and W2. Store the result  #
        # in the variable loss, which should be a scalar. Use the Softmax           #
        # classifier loss. So that your results match ours, multiply the            #
        # regularization loss by 0.5                                                #
        #############################################################################
        pass
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        # Backward pass: compute gradients

        l1, gradient = softmax_loss_naive(W21, Layer1, y, reg)

        #print "grad:",gradient.shape
        grads = {}
        '''
    print "W1:",W1.shape
    print "W2:",W2.shape
    print "X:",X.shape
    print "b1:",b1.shape
    print "b2:",b2.shape
    
    print "L!",Layer1.shape
    
    dW=np.zeros_like(W2)
    prob_sum_term=np.sum(L,axis=1)
    for i in xrange(X.shape[0]):
	  L[i]/=prob_sum_term[i]
	  mask=[cf==y[i] for cf in xrange(W2.shape[1])]
	  L[i]=L[i]-mask
	  
	  for feat in xrange(W2.shape[0]):
		  dW[feat]+=(L[i].T*Layer1[i][feat])
	'''
        #print "Gradient in ans:",gradient
        #grads={}
        grads['W2'] = gradient + reg * W2  #np.dot(Layer1,gradient)
        grads['W1'] = W1
        grads['b2'] = b2  #gradient[0]
        grads['b1'] = b1

        #############################################################################
        # TODO: Compute the backward pass, computing the derivatives of the weights #
        # and biases. Store the results in the grads dictionary. For example,       #
        # grads['W1'] should store the gradient on W1, and be a matrix of same size #
        #############################################################################
        pass
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        return loss, grads
예제 #3
0
 def test_softmax_loss_naive_loss(self):
     loss, _ = softmax_loss_naive(self.weights, self.x, self.y, self.reg)
     np.testing.assert_allclose(loss, self.expected, 1e-04)
예제 #4
0
 def test_softmax_loss_naive_gradient(self):
     loss, grad = softmax_loss_naive(self.weights, self.x, self.y, self.reg)
     f = lambda w: softmax_loss_naive(self.weights, self.x, self.y, self.reg
                                      )[0]
     grad_numerical = test_utils.grad_check_sparse(f, self.weights, grad)
예제 #5
0
X_train, y_train, X_val, y_val, X_test, y_test = utils.get_CIFAR10_data()

# First implement the naive softmax loss function with nested loops.
# Open the file softmax.py and implement the
# softmax_loss_naive function.

# Generate a random softmax theta matrix and use it to compute the loss.

theta = np.random.randn(3073,10) * 0.0001
<<<<<<< HEAD



loss, grad = softmax_loss_vectorized(theta, X_train, y_train, 0.0)
=======
loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0)
>>>>>>> 89dd6a53aa0ff700b713b57c5d8d001424557b1d

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', - np.log(0.1)

# Use numeric gradient checking as a debugging tool.
# The numeric gradient should be close to the analytic gradient. (within 1e-7)

from gradient_check import grad_check_sparse
f = lambda th: softmax_loss_naive(th, X_train, y_train, 0.0)[0]
grad_numerical = grad_check_sparse(f, theta, grad, 10)

# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
예제 #6
0
print('X_train shape: ' + str(X_train.shape))
print('Y_train shape: ' + str(Y_train.shape))
print('X_test shape: ' + str(X_test.shape))
print('Y_test shape: ' + str(Y_test.shape))
print('X_val shape: ' + str(X_val.shape))
print('Y_val shape: ' + str(Y_val.shape))
print('X_dev shape: ' + str(X_dev.shape))
print('Y_dev shape: ' + str(Y_dev.shape))

#############################################################################
#################################  softmax classifier

# First implement the naive softmax loss function with nested loops.

W = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax.softmax_loss_naive(W, X_dev, Y_dev, 0.0)

# As a rough sanity check, our loss should be something close to -log(0.1).
print('loss: ' + str(loss))
print('sanity check: ' + str(-np.log(0.1)))
# the reason is that: there are 10 classes,
#and the averge of Loss if approximately equal to  -np.log(0.1)

# As we did for the SVM, use numeric gradient checking as a debugging tool.
# The numeric gradient should be close to the analytic gradient.
loss, grad = softmax.softmax_loss_naive(W, X_dev, Y_dev, 0.0)
f = lambda w: softmax.softmax_loss_naive(w, X_dev, Y_dev, 0.0)[0]
Tools.grad_check_sparse(f, W, grad, 10)

# similar to SVM case, do another gradient check with regularization
loss, grad = softmax.softmax_loss_naive(W, X_dev, Y_dev, 5e1)
예제 #7
0
    file = "../train/" + str(i) + ".png"
    x = si.imread(file)
    x = x.reshape(3072)
    X_val.append(x)
    if i % 5000 == 0:
        print("Reading data for index = ", i)
X_val = np.asarray(X_val)

X_train, _, _ = utils.std_features(X_train)
X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T

X_val, _, _ = utils.std_features(X_val)
X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T

theta = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0)

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', -np.log(0.1)

tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001)
toc = time.time()
print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)

tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(
    theta, X_train, y_train, 0.00001)
toc = time.time()
print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)
  x = si.imread(file)
  x = x.reshape(3072)
  X_val.append(x)
  if i % 5000 == 0:
    print("Reading data for index = ", i)
X_val = np.asarray(X_val)


X_train,_,_ = utils.std_features(X_train)
X_train = np.vstack((np.ones(X_train.shape[0]), X_train.T)).T

X_val,_,_ = utils.std_features(X_val)
X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T

theta = np.random.randn(3073,10) * 0.0001
loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0)

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', - np.log(0.1)

tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001)
toc = time.time()
print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)

tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(theta, X_train, y_train, 0.00001)
toc = time.time()
print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)
예제 #9
0
##print 'Train data shape: ', X_train.shape
##print 'Train labels shape: ', y_train.shape
##print 'Validation data shape: ', X_val.shape
##print 'Validation labels shape: ', y_val.shape
##print 'Test data shape: ', X_test.shape
##print 'Test labels shape: ', y_test.shape

W = np.random.randn(10,3073)*0.0001
##loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0)
##print 'loss: %f' % loss
loss, grad = softmax_loss_vectorized(W, X_train, y_train, 0.0)
print 'loss: %f' % loss

from gradient_check import grad_check_sparse
f = lambda w: softmax_loss_naive(w, X_train, y_train, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)

##import numpy as np
##a = np.arange(15).reshape(3, 5)*0.1
##probs = a / np.sum(a, axis=0)
##y = np.random.choice(3, 5)

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.

results = {}
best_val = -1
best_softmax = None