예제 #1
0
 def test_softmax_loss_vectorized_gradient(self):
     loss_naive, grad_naive = softmax_loss_naive(self.weights, self.x,
                                                 self.y, self.reg)
     loss_vect, grad_vect = softmax_loss_vectorized(self.weights, self.x,
                                                    self.y, self.reg)
     np.testing.assert_allclose(loss_naive, loss_vect, 1e-04)
     np.testing.assert_allclose(grad_naive, grad_vect, 1e-04)
예제 #2
0
    def loss(self, X, y=None, reg=0.0):
        """
        Compute the loss and gradients for a two layer fully connected neural
        network.

        Inputs:
        - X: Input data of shape (N, D). Each X[i] is a training sample.
        - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
          an integer in the range 0 <= y[i] < C. This parameter is optional; if it
          is not passed then we only return scores, and if it is passed then we
          instead return the loss and gradients.
        - reg: Regularization strength.

        Returns:
        If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
        the score for class c on input X[i].

        If y is not None, instead return a tuple of:
        - loss: Loss (data loss and regularization loss) for this batch of training
          samples.
        - grads: Dictionary mapping parameter names to gradients of those parameters
          with respect to the loss function; has the same keys as self.params.
        """
        # Unpack variables from the params dictionary
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape

        # Compute the forward pass
        h1, h2, scores = self.compute_score(X, W1, b1, W2, b2)
        if y is None:
            return scores

        # Compute the loss
        loss = softmax_loss_vectorized(
            W=None, X=None, y=y, scores=scores)[0] + reg * (
                np.sum(np.square(W1)) + np.sum(np.square(W2)))

        # Backward pass: compute gradients
        grads = {}
        exp_scores = np.exp(scores)
        sum_scores = np.sum(exp_scores, axis=1)
        prob_scores = (exp_scores.T / sum_scores).T
        prob_scores[np.arange(N), y] -= 1
        dLoss_Scores = prob_scores / N
        dLoss_b2 = np.sum(dLoss_Scores, axis=0)
        grads['b2'] = dLoss_b2
        dLoss_W2 = h2.T.dot(dLoss_Scores) + 2 * reg * W2
        grads['W2'] = dLoss_W2
        dLoss_h2 = dLoss_Scores.dot(W2.T)
        dh2_h1 = np.zeros_like(h1)
        dh2_h1[h1 <= 0] = 0
        dh2_h1[h1 > 0] = 1
        dLoss_h1 = dh2_h1 * dLoss_h2
        dLoss_b1 = np.sum(dLoss_h1, axis=0)
        grads['b1'] = dLoss_b1
        dLoss_W1 = X.T.dot(dLoss_h1) + 2 * reg * W1
        grads['W1'] = dLoss_W1
        return loss, grads
예제 #3
0
 def loss(self, X_batch, y_batch, reg):
     return softmax_loss_vectorized(self.theta, X_batch, y_batch, reg)
예제 #4
0
    # from gradient_check import grad_check_sparse
    # f = lambda th: softmax_loss_naive(th, X_train, y_train, 0.0)[0]
    # grad_numerical = grad_check_sparse(f, theta, grad, 10)

    # Now that we have a naive implementation of the softmax loss function and its gradient,
    # implement a vectorized version in softmax_loss_vectorized.
    # The two versions should compute the same results, but the vectorized version should be
    # much faster.

    # tic = time.time()
    # loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001)
    # toc = time.time()
    # print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)

    tic = time.time()
    loss_vectorized, grad_vectorized = softmax_loss_vectorized(theta, X_train, y_train, 0.00001)
    toc = time.time()
    print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)


    # We use the Frobenius norm to compare the two versions
    # of the gradient.

    # grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
    # print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
    # print 'Gradient difference: %f' % grad_difference

    learning_rates = [1e-7]
    regularization_strengths = [1e8]
    # best_softmax, results, best_val = pick_hyperparams(X_train,y_train, X_val, y_val, learning_rates, regularization_strengths)
예제 #5
0
from util import plt, np, load_data, grad_check_sparse, time_elapse
from softmax import softmax_loss_vectorized
from linear_classifier import Softmax

cifar_dir = '../cifar-10-batches-py'
X_train, y_train, X_val, y_val, X_test, y_test, X_dev, y_dev = load_data(
    cifar_dir, num_test=500)

# ininialize W
W = np.random.randn(3073, 10) * 0.0001

# test loss
loss, grad = softmax_loss_vectorized(W, X_dev, y_dev, 0.0)
#print('loss: %f' % loss)
#print('sanity check: %f' % (-np.log(0.1)))

# test gradient without regularization
#def f(w): return softmax_loss_vectorized(W, X_dev, y_dev, 0.0)[0]
#grad_numerical = grad_check_sparse(f, W, grad, 10)

# test gradient with regularization
#def f(w): return softmax_loss_vectorized(W, X_dev, y_dev, 1e2)[0]
#grad_numerical = grad_check_sparse(f, W, grad, 10)

softmax = Softmax()
loss_history = softmax.train(X_train,
                             y_train,
                             learning_rate=1e-7,
                             reg=5e4,
                             num_iters=1500,
                             verbose=True)
예제 #6
0
 def test_softmax_loss_vectorized_loss(self):
     loss, _ = softmax_loss_vectorized(self.weights, self.x, self.y,
                                       self.reg)
     np.testing.assert_allclose(loss, self.expected, 1e-04)
예제 #7
0
X_val = np.vstack((np.ones(X_val.shape[0]), X_val.T)).T

theta = np.random.randn(3073, 10) * 0.0001
loss, grad = softmax_loss_naive(theta, X_train, y_train, 0.0)

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', -np.log(0.1)

tic = time.time()
loss_naive, grad_naive = softmax_loss_naive(theta, X_train, y_train, 0.00001)
toc = time.time()
print 'naive loss: %e computed in %fs' % (loss_naive, toc - tic)

tic = time.time()
loss_vectorized, grad_vectorized = softmax_loss_vectorized(
    theta, X_train, y_train, 0.00001)
toc = time.time()
print 'vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic)

# We use the Frobenius norm to compare the two versions
# of the gradient.

grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print 'Loss difference: %f' % np.abs(loss_naive - loss_vectorized)
print 'Gradient difference: %f' % grad_difference

results = {}
best_val = -1
best_softmax = None
#learning_rates = [1e-7, 5e-7, 1e-6, 5e-6]
#regularization_strengths = [ 5e4, 1e5, 5e5, 1e8]
예제 #8
0
loss, grad = softmax.softmax_loss_naive(W, X_dev, Y_dev, 5e1)
f = lambda w: softmax.softmax_loss_naive(w, X_dev, Y_dev, 5e1)[0]
Tools.grad_check_sparse(f, W, grad, 10)

# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.
tic = time.time()
loss_naive, grad_naive = softmax.softmax_loss_naive(W, X_dev, Y_dev, 0.000005)
toc = time.time()
print('naive loss: %e computed in %fs' % (loss_naive, toc - tic))

f
tic = time.time()
loss_vectorized, grad_vectorized = softmax.softmax_loss_vectorized(
    W, X_dev, Y_dev, 0.000005)
toc = time.time()
print('vectorized loss: %e computed in %fs' % (loss_vectorized, toc - tic))

# As we did for the SVM, we use the Frobenius norm to compare the two versions
# of the gradient.
grad_difference = np.linalg.norm(grad_naive - grad_vectorized, ord='fro')
print('Loss difference: %f' % np.abs(loss_naive - loss_vectorized))
print('Gradient difference: %f' % grad_difference)

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.
results = {}
best_val = -1
예제 #9
0
 def loss(self, X, y, reg):
     return softmax_loss_vectorized(X, y, reg)
예제 #10
0
    def loss(self, X, y=None, reg=0.0):
        """
    Compute the loss and gradients for a two layer fully connected neural
    network.

    Inputs:
    - X: Input data of shape (N, D). Each X[i] is a training sample.
    - y: Vector of training labels. y[i] is the label for X[i], and each y[i] is
      an integer in the range 0 <= y[i] < C. This parameter is optional; if it
      is not passed then we only return scores, and if it is passed then we
      instead return the loss and gradients.
    - reg: Regularization strength.

    Returns:
    If y is None, return a matrix scores of shape (N, C) where scores[i, c] is
    the score for class c on input X[i].

    If y is not None, instead return a tuple of:
    - loss: Loss (data loss and regularization loss) for this batch of training
      samples.
    - grads: Dictionary mapping parameter names to gradients of those parameters
      with respect to the loss function; has the same keys as self.params.
    """
        # Unpack variables from the params dictionary
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N, D = X.shape

        #print "REG:",reg

        X1 = np.insert(X, 0, 1, axis=1)

        W11 = np.insert(W1, 0, b1, axis=0)
        W21 = np.insert(W2, 0, b2, axis=0)

        scores = None
        z1 = X1.dot(W11)
        Layer1ub = np.maximum(0, z1)
        Layer1 = np.insert(np.maximum(0, z1), 0, 1, axis=1)
        # Compute the forward pass

        scores = Layer1.dot(W21)
        #############################################################################
        # TODO: Perform the forward pass, computing the class scores for the input. #
        # Store the result in the scores variable, which should be an array of      #
        # shape (N, C).                                                             #
        #############################################################################
        pass
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        # If the targets are not given then jump out, we're done
        if y is None:
            return scores
        loss = 0.0
        # Compute the loss
        L = np.exp(scores)
        #print sc
        for i in range(X.shape[0]):
            loss -= scores[i][y[i]]
            loss += math.log(sum(L[i]))
        loss /= X.shape[0]
        loss += +0.5 * reg * np.sum(W1 * W1) + 0.5 * reg * np.sum(W2 * W2)

        #############################################################################
        # TODO: Finish the forward pass, and compute the loss. This should include  #
        # both the data loss and L2 regularization for W1 and W2. Store the result  #
        # in the variable loss, which should be a scalar. Use the Softmax           #
        # classifier loss. So that your results match ours, multiply the            #
        # regularization loss by 0.5                                                #
        #############################################################################
        pass
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        # Backward pass: compute gradients

        l1, w2gradient = softmax_loss_vectorized(W21, Layer1, y, reg)
        derror = error_class(Layer1ub, y, W2,
                             b2)  #obtain error in outermost layer
        dhidden = np.dot(derror, W2.T)  #backprop error to hidden layer
        dhidden[Layer1ub <= 0] = 0  #apply ReLu
        w1grads = np.dot(X.T, dhidden)  #compute grad
        #print "grad:",w1grads.shape,W1.shape
        grads = {}

        grads['W2'] = w2gradient[1:] + reg * sum(
            W1 * W1)  #np.dot(Layer1,gradient)
        grads['W1'] = w1grads + reg * W1  #w1Lgradient[1:]+reg*W1
        grads['b2'] = w2gradient[0]
        grads['b1'] = np.sum(dhidden, axis=0)  #w1Lgradient[0]

        #print grads['W1']

        #############################################################################
        # TODO: Compute the backward pass, computing the derivatives of the weights #
        # and biases. Store the results in the grads dictionary. For example,       #
        # grads['W1'] should store the gradient on W1, and be a matrix of same size #
        #############################################################################
        pass
        #############################################################################
        #                              END OF YOUR CODE                             #
        #############################################################################

        return loss, grads
예제 #11
0
x_val = np.reshape(x_val, (x_val.shape[0], -1))
x_dev = np.reshape(x_dev, (x_dev.shape[0], -1))
#下面进行归一化处理,对每个特征减去平均值来中心化
mean_image = np.mean(x_train, axis=0)
x_train -= mean_image
x_val -= mean_image
x_test -= mean_image
x_dev -= mean_image
#权重矩阵W其实是W和b,因此我们需要x增加一个维度
x_train = np.hstack([x_train, np.ones((x_train.shape[0], 1))])
x_val = np.hstack([x_val, np.ones((x_val.shape[0], 1))])
x_test = np.hstack([x_test, np.ones((x_test.shape[0], 1))])
x_dev = np.hstack([x_dev, np.ones((x_dev.shape[0], 1))])
#损失函数和梯度计算
w = np.random.randn(3073, 10) * 0.0001  #返回(3073,10)尺寸的符合正态分布的随机数组
loss, grad = softmax.softmax_loss_vectorized(
    w, x_dev, y_dev, 0.00001)  #也可使用公式计算梯度 svm_loss_vectorized
print('loss is : %f' % loss)
#梯度检验,用公式计算梯度速度很快,但是实现过程中容易出错,为了解决这个问题,需要进行梯度检验
#把分析梯度法的结果和数值梯度法的结果做比较


def grad_check_sparse(f, x, analytic_grad, num_checks=10, h=1e-5):
    for i in range(num_checks):
        ix = tuple([random.randrange(m) for m in x.shape])
        oldval = x[ix]
        x[ix] = oldval + h
        fxph = f(x)
        x[ix] = oldval - h
        fxmh = f(x)
        x[ix] = oldval
        grad_numerical = (fxph - fxmh) / (2 * h)
 def loss(self,X_batch,Y_batch,regularization):
     return softmax.softmax_loss_vectorized(self.W,X_batch,Y_batch,regularization)
예제 #13
0
  
  return X_train, y_train, X_val, y_val, X_test, y_test

X_train, y_train, X_val, y_val, X_test, y_test = get_CIFAR10_data()

##print 'Train data shape: ', X_train.shape
##print 'Train labels shape: ', y_train.shape
##print 'Validation data shape: ', X_val.shape
##print 'Validation labels shape: ', y_val.shape
##print 'Test data shape: ', X_test.shape
##print 'Test labels shape: ', y_test.shape

W = np.random.randn(10,3073)*0.0001
##loss, grad = softmax_loss_naive(W, X_train, y_train, 0.0)
##print 'loss: %f' % loss
loss, grad = softmax_loss_vectorized(W, X_train, y_train, 0.0)
print 'loss: %f' % loss

from gradient_check import grad_check_sparse
f = lambda w: softmax_loss_naive(w, X_train, y_train, 0.0)[0]
grad_numerical = grad_check_sparse(f, W, grad, 10)

##import numpy as np
##a = np.arange(15).reshape(3, 5)*0.1
##probs = a / np.sum(a, axis=0)
##y = np.random.choice(3, 5)

# Use the validation set to tune hyperparameters (regularization strength and
# learning rate). You should experiment with different ranges for the learning
# rates and regularization strengths; if you are careful you should be able to
# get a classification accuracy of over 0.35 on the validation set.
예제 #14
0
X=np.concatenate((X,np.ones([X.shape[0],1])),axis=1)
X_test=np.concatenate((X_test,np.ones([X_test.shape[0],1])),axis=1)
X_train, X_val, y_train, y_val = cross_validation.train_test_split(X,y, test_size=0.4, random_state=0)

# First implement the naive softmax loss function with nested loops.
# Open the file softmax.py and implement the
# softmax_loss_naive function.

# Generate a random softmax theta matrix and use it to compute the loss.

theta = np.random.randn(3073,10) * 0.0001




loss, grad = softmax_loss_vectorized(theta, X_train, y_train, 0.0)

# Loss should be something close to - log(0.1)

print 'loss:', loss, ' should be close to ', - np.log(0.1)

# Use numeric gradient checking as a debugging tool.
# The numeric gradient should be close to the analytic gradient. (within 1e-7)


# Now that we have a naive implementation of the softmax loss function and its gradient,
# implement a vectorized version in softmax_loss_vectorized.
# The two versions should compute the same results, but the vectorized version should be
# much faster.

예제 #15
0
    def loss1(self, X, y=None):
        """
    Compute loss and gradient for a minibatch of data.

    Inputs:
    - X: Array of input data of shape (N, d_1, ..., d_k)
    - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

    Returns:
    If y is None, then run a test-time forward pass of the model and return:
    - scores: Array of shape (N, C) giving classification scores, where
      scores[i, c] is the classification score for X[i] and class c.

    If y is not None, then run a training-time forward and backward pass and
    return a tuple of:
    - loss: Scalar value giving the loss
    - grads: Dictionary with the same keys as self.params, mapping parameter
      names to gradients of the loss with respect to those parameters.
    """
        scores = None
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        N = X.shape[0]

        feat = 1
        for num_wei in X.shape[1:]:
            feat *= num_wei
        #out = None
        Xfeat = X.reshape([X.shape[0], feat])
        #print "X1: shape",X1.shape
        X1 = np.insert(Xfeat, 0, 1, axis=1)
        #print "X1: shape",X1.shape

        W11 = np.insert(W1, 0, b1, axis=0)
        W21 = np.insert(W2, 0, b2, axis=0)

        scores = None
        z1 = X1.dot(W11)
        Layer1ub = np.maximum(0, z1)
        scoresub = np.exp(Layer1ub.dot(W2) + b2)
        Layer1 = np.insert(np.maximum(0, z1), 0, 1, axis=1)
        #print "ALyer1",Layer1.shape
        scores = Layer1.dot(W21)
        ############################################################################
        # TODO: Implement the forward pass for the two-layer net, computing the    #
        # class scores for X and storing them in the scores variable.              #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        # If y is None then we are in test mode so just return scores
        if y is None:
            return scores

        loss, grads = 0, {}
        loss = 0.0
        # Compute the loss
        L = np.exp(scores)
        #print sc
        for i in range(X.shape[0]):
            loss -= scores[i][y[i]]
            loss += math.log(sum(L[i]))
        loss /= X.shape[0]
        loss += +0.5 * self.reg * np.sum(W1 * W1) + 0.5 * self.reg * np.sum(
            W2 * W2)
        ############################################################################
        # TODO: Implement the backward pass for the two-layer net. Store the loss  #
        # in the loss variable and gradients in the grads dictionary. Compute data #
        # loss using softmax, and make sure that grads[k] holds the gradients for  #
        # self.params[k]. Don't forget to add L2 regularization!                   #
        #                                                                          #
        # NOTE: To ensure that your implementation matches ours and you pass the   #
        # automated tests, make sure that your L2 regularization includes a factor #
        # of 0.5 to simplify the expression for the gradient.                      #
        ############################################################################
        pass
        ############################################################################
        #                             END OF YOUR CODE                             #
        ############################################################################

        l1, w2gradient = softmax_loss_vectorized(W21, Layer1, y, self.reg)
        derror = error_class(Layer1ub, y, W2,
                             b2)  #obtain error in outermost layer
        dhidden = np.dot(derror, W2.T)  #backprop error to hidden layer
        dhidden[Layer1ub <= 0] = 0  #apply ReLu
        w1grads = np.dot(Xfeat.T, dhidden)  #compute grad
        #print "grad:",w1grads.shape,W1.shape
        grads = {}

        #print "W1:",w1grads.shape,W1.shape
        grads['W2'] = w2gradient[1:]  #.reshape(#np.dot(Layer1,gradient)
        grads['W1'] = w1grads  #+self.reg*W1#w1Lgradient[1:]+reg*W1
        grads['b2'] = w2gradient[0]  #np.sum(scoresub, axis=0)#
        grads['b1'] = np.sum(dhidden, axis=0)  #w1Lgradient[0]

        #print "grads over"
        return loss, grads