def train_loss(*args):
      X = args[0]
      y = args[1]

      res = X
      for l in xrange(self.num_layers):
        prev_res = res
        res = affine_forward(prev_res, args[self.w_idx(l)], args[self.b_idx(l)])

        if l < (self.num_layers - 1):
          if self.use_batchnorm:
            res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                    args[self.bn_bt_idx(l)], self.bn_params[l])
          res = relu_forward(res)
          if self.use_dropout:
            res = dropout_forward(res, self.dropout_param)

      scores = res

      if mode == 'test':
        return scores

      #loss, _ = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      return loss
Ejemplo n.º 2
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        W1, W2, b1, b2 = self.params['W1'], self.params['W2'], self.params[
            'b1'], self.params['b2']
        hidden_out, cache1 = affine_relu_forward(X, W1, b1)
        scores, cache2 = affine_forward(hidden_out, W2, b2)

        if y is None:
            return scores

        grads = {}
        loss, dScore = softmax_loss(scores, y)
        loss += .5 * self.reg * (np.sum(W1**2) + np.sum(W2**2))
        dX2, grads['W2'], grads['b2'] = affine_backward(dScore, cache2)
        dX, grads['W1'], grads['b1'] = affine_relu_backward(dX2, cache1)
        grads['W2'] += self.reg * W2
        grads['W1'] += self.reg * W1
        return loss, grads
Ejemplo n.º 3
0
        def train_loss(*args):
            X = args[0]
            y = args[1]

            res = X
            for l in xrange(self.num_layers):
                prev_res = res
                res = affine_forward(prev_res, args[self.w_idx(l)],
                                     args[self.b_idx(l)])

                if l < (self.num_layers - 1):
                    if self.use_batchnorm:
                        res = batchnorm_forward(res, args[self.bn_ga_idx(l)],
                                                args[self.bn_bt_idx(l)],
                                                self.bn_params[l])
                    res = relu_forward(res)
                    if self.use_dropout:
                        res = dropout_forward(res, self.dropout_param)

            scores = res

            if mode == 'test':
                return scores

            #loss, _ = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            return loss
Ejemplo n.º 4
0
    def loss(self, X, y=None, reg=1e-5):
        print 'start computing loss and grad.............'
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # pass conv_param to the forward pass for the convolutional layer
        filter_size = W1.shape[2]
        conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2}

        # pass pool_param to the forward pass for the max-pooling layer
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

        # compute the forward pass
        print 'compute the forward pass......'
        print 'compute the w1 conv_relu_pool_forward forward pass......'
        a1, cache1 = layers.conv_relu_pool_forward(X, W1, b1, conv_param,
                                                   pool_param)

        print 'compute the w2 affine_relu_forward forward pass......'
        a2, cache2 = layers.affine_relu_forward(a1, W2, b2)

        print 'compute the w3 affine_forward forward pass......'
        scores, cache3 = layers.affine_forward(a2, W3, b3)

        if y is None:
            return scores

        # compute the backward pass
        print 'compute the backward pass......'
        print 'compute the softmax_loss backward pass......'
        data_loss, dscores = layers.softmax_loss(scores, y)

        print 'compute the dw3 affine_backward backward pass......'
        da2, dW3, db3 = layers.affine_backward(dscores, cache3)

        print 'compute the dw2 affine_relu_backward backward pass......'
        da1, dW2, db2 = layers.affine_relu_backward(da2, cache2)

        print 'compute the dw1 conv_relu_pool_backward backward pass......'
        dX, dW1, db1 = layers.conv_relu_pool_backward(da1, cache1)

        # Add regularization
        dW1 += self.reg * W1
        dW2 += self.reg * W2
        dW3 += self.reg * W3
        reg_loss = 0.5 * self.reg * sum(np.sum(W * W) for W in [W1, W2, W3])
        loss = data_loss + reg_loss
        grads = {
            'W1': dW1,
            'b1': db1,
            'W2': dW2,
            'b2': db2,
            'W3': dW3,
            'b3': db3
        }
        print ' computing loss and grad end !!!!!!!!!!!!!!!!!'
        print 'loss is :', loss
        return loss, grads
Ejemplo n.º 5
0
    def loss(self, X, y=None, reg=1e-5):

        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # pass conv_param to the forward pass for the convolutional layer
        filter_size = W1.shape[2]
        conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2}

        # pass pool_param to the forward pass for the max-pooling layer
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

        # compute the forward pass
        a1, cache1 = layers.conv_relu_pool_forward(X, W1, b1, conv_param,
                                                   pool_param)
        norm_out, norm_cache = layers.spatial_batchnorm_forward(
            a1, 1, 0, bn_param={'mode': 'train'})

        a2, cache2 = layers.affine_relu_forward(norm_out, W2, b2)
        scores, cache3 = layers.affine_forward(a2, W3, b3)

        if y is None:
            return scores

        # compute the backward pass
        data_loss = NUS_loss_test.NUSDataTrain().loss(scores, y)
        dscores = NUS_loss_test.NUSDataTrain().eval_numerical_gradient(
            NUS_loss_test.NUSDataTrain().grad_loss,
            scores)  # layers.softmax_loss(scores, y)#改这里
        da2, dW3, db3 = layers.affine_backward(dscores, cache3)
        da1, dW2, db2 = layers.affine_relu_backward(da2, cache2)
        dnorm_out, dgamma, dbeta = layers.spatial_batchnorm_backward(
            da1, norm_cache)
        dX, dW1, db1 = layers.conv_relu_pool_backward(dnorm_out, cache1)

        # Add regularization
        dW1 += self.reg * W1
        dW2 += self.reg * W2
        dW3 += self.reg * W3
        reg_loss = 0.5 * self.reg * sum(np.sum(W * W) for W in [W1, W2, W3])
        loss = data_loss + reg_loss
        grads = {
            'W1': dW1,
            'b1': db1,
            'W2': dW2,
            'b2': db2,
            'W3': dW3,
            'b3': db3
        }

        return loss, grads
    def train_loss(X, y, W1, W2, b1, b2):
      l1 = affine_relu_forward(X, W1, b1)
      l2 = affine_forward(l1, W2, b2)
      scores = l2

      if y is None:
        return scores
   
      #[TODO]: softmax is not supported yet
      # loss, d_scores = softmax_loss(scores, y)
      loss = svm_loss(scores, y)
      loss_with_reg = loss + np.sum(W1 ** 2) * 0.5 * self.reg + np.sum(W2 ** 2) * 0.5 * self.reg

      return loss_with_reg 
Ejemplo n.º 7
0
def affine_relu_forward(x, w, b):
    """
  Convenience layer that perorms an affine transform followed by a ReLU

  Inputs:
  - x: Input to the affine layer
  - w, b: Weights for the affine layer

  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  """
    a = affine_forward(x, w, b)
    out = relu_forward(a)
    return out
def affine_relu_forward(x, w, b):
  """
  Convenience layer that perorms an affine transform followed by a ReLU

  Inputs:
  - x: Input to the affine layer
  - w, b: Weights for the affine layer

  Returns a tuple of:
  - out: Output from the ReLU
  - cache: Object to give to the backward pass
  """
  a = affine_forward(x, w, b)
  out = relu_forward(a)
  return out
Ejemplo n.º 9
0
        def train_loss(X, y, W1, W2, b1, b2):
            l1 = affine_relu_forward(X, W1, b1)
            l2 = affine_forward(l1, W2, b2)
            scores = l2

            if y is None:
                return scores

            #[TODO]: softmax is not supported yet
            # loss, d_scores = softmax_loss(scores, y)
            loss = svm_loss(scores, y)
            loss_with_reg = loss + np.sum(W1**2) * 0.5 * self.reg + np.sum(
                W2**2) * 0.5 * self.reg

            return loss_with_reg
Ejemplo n.º 10
0
def affine_relu_forward(x, w, b):
    '''
     Convenience layer that perorms an affine transform followed by a ReLU
     input:
         x:input to the affine layer
         w: wights
         b: bias
     return: a tuple
         out: output from the relu
         cache: object to give to the backward pass
    '''
    a, fc_cache = layers.affine_forward(x, w, b)  # a=wx+b fc_cache=(x,w,b)
    out, relu_cache = layers.relu_forward(
        a)  # out=np.maximum(0,a)  relu_cache=a
    cache = (fc_cache, relu_cache)
    return out, cache
Ejemplo n.º 11
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for a minibatch of data.

        Inputs:
        - X: Array of input data of shape (N, d_1, ..., d_k)
        - y: Array of labels, of shape (N,). y[i] gives the label for X[i].

        Returns:
        If y is None, then run a test-time forward pass of the model and return:
        - scores: Array of shape (N, C) giving classification scores, where
          scores[i, c] is the classification score for X[i] and class c.

        If y is not None, then run a training-time forward and backward pass and
        return a tuple of:
        - loss: Scalar value giving the loss
        - grads: Dictionary with the same keys as self.params, mapping parameter
          names to gradients of the loss with respect to those parameters.
        """
        out1, cache1 = layer_utilities.affine_relu_forward(
            X, self.params['W1'], self.params['b1'])
        out2, cache2 = layers.affine_forward(
            out1, self.params['W2'],
            self.params['b2'])  # last layer no need to use relu
        scores = out2

        if y is None:
            return scores
        # backward
        loss, grads = 0, {}
        loss, d_scores = layers.softmax_loss(scores, y)
        loss = loss + 0.5 * self.reg * (
            np.sum(self.params['W1'] * self.params['W1']) +
            np.sum(self.params['W2'] * self.params['W2']))

        dout1, dW2, db2 = layers.affine_backward(d_scores, cache2)
        dx, dW1, db1 = layer_utilities.affine_relu_backward(dout1, cache1)

        grads['W2'] = dW2 + self.reg * self.params['W2']
        grads['b2'] = db2
        grads['W1'] = dW1 + self.reg * self.params['W1']
        grads['b1'] = db1

        return loss, grads
Ejemplo n.º 12
0
    def loss(self,X,y=None):
        """
                Compute loss and gradient for a minibatch of data.
                Inputs:
                - X: Array of input data of shape (N, d_1, ..., d_k)
                - y: Array of labels, of shape (N,). y[i] gives the label for X[i].
                Returns:
                If y is None, then run a test-time forward pass of the model and return:
                - scores: Array of shape (N, C) giving classification scores, where
                  scores[i, c] is the classification score for X[i] and class c.
                If y is not None, then run a training-time forward and backward pass and
                return a tuple of:
                - loss: Scalar value giving the loss
                - grads: Dictionary with the same keys as self.params, mapping parameter
                  names to gradients of the loss with respect to those parameters.
                """
        scores = None
        W1,b1 = self.params['W1'],self.params['b1']
        W2,b2 = self.params['W2'],self.params['b2']

        ar1_out,ar1_cache = affine_relu_forward(X,W1,b1)
        ar2_out,ar2_cache = affine_forward(ar1_out,W2,b2)

        scores = ar2_out

        if y is None:
            return scores
        loss,grads = 0,{}
        loss,dout = softmax_loss(scores,y)
        loss = loss+0.5*self.reg*np.sum(W1*W1)+0.5*self.reg*np.sum(W2*W2)
        dx2,dw2,db2 = affine_backward(dout,ar2_cache)
        grads['W2'] = dw2 +self.reg*W2
        grads['b2'] = db2
        dx1,dw1,db1 = affine_relu_backward(dx2,ar1_cache)
        grads['W1'] = dw1+self.reg*W1
        grads['b1'] = db1

        return loss,grads
Ejemplo n.º 13
0
    def loss(self, x, y=None):
        """
            Loss function used is MSE loss
        """
        scores = None
        scores, cache1 = affine_relu_forward(x, self.params['W1'], self.params['b1'])
        scores, cache2 = affine_relu_forward(scores, self.params['W2'], self.params['b2'])
        scores, cache3 = affine_relu_forward(scores, self.params['W3'], self.params['b3'])
        scores, cache4 = affine_forward(scores, self.params['W4'], self.params['b4'])

        if y is None:
            return scores

        loss = mse_loss_forward(scores, y)

        grads = {}
        dup = mse_loss_backward(scores, y)
        dup, grads['W4'], grads['b4'] = affine_backward(dup, cache4)
        dup, grads['W3'], grads['b3'] = affine_relu_backward(dup, cache3)
        dup, grads['W2'], grads['b2'] = affine_relu_backward(dup, cache2)
        dup, grads['W1'], grads['b1'] = affine_relu_backward(dup, cache1)

        return loss, grads
Ejemplo n.º 14
0
    def predict(self, X):
        """
            Inputs:
                - X: A numpy array of shape (N, D) giving N D-dimensional data points to
                     classify.
                Returns:
                - y_pred: A numpy array of shape (N,) giving predicted labels for each of
                          the elements of X. For all i, y_pred[i] = c means that X[i] is
                          predicted to have class c, where 0 <= c < C.
        """
        y_pred = None

        # h1 = layers.ReLU(np.dot(X, self.params['W1']) + self.params['b1'])
        # scores = np.dot(h1, self.params['W2']) + self.params['b2']
        # y_pred = np.argmax(scores, axis=1)
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # pass conv_param to the forward pass for the convolutional layer
        filter_size = W1.shape[2]
        conv_param = {'stride': 1, 'pad': (filter_size - 1) / 2}

        # pass pool_param to the forward pass for the max-pooling layer
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

        # compute the forward pass

        a1, cache1 = layers.conv_relu_pool_forward(X, W1, b1, conv_param,
                                                   pool_param)
        a2, cache2 = layers.affine_relu_forward(a1, W2, b2)

        scores, cache3 = layers.affine_forward(a2, W3, b3)
        y_pred = np.argmax(scores, axis=1)

        return y_pred
Ejemplo n.º 15
0
import layers
import numpy as np

num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape),
                                                    output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)

out, _ = layers.affine_forward(x, w, b)
def affine_relu_forward(x, w, b):
    a, fc_cache = affine_forward(x, w, b)
    out, relu_cache = relu_forward(a)
    cache = (fc_cache, relu_cache)
    return out, cache
Ejemplo n.º 17
0
###################################################################################

# Test the affine_forward function

num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
theta_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
theta = np.linspace(-0.2, 0.3, num=theta_size).reshape(np.prod(input_shape), output_dim)
theta_0 = np.linspace(-0.3, 0.1, num=output_dim)

out, _ = layers.affine_forward(x, theta, theta_0)
correct_out = np.array([[ 1.49834967,  1.70660132,  1.91485297],
                        [ 3.25553199,  3.5141327,   3.77273342]])

# Compare your output with ours. The error should be around 1e-9.
if out.any():
  print 'Testing affine_forward function:'
  print 'difference (should be around 1e-9): ', rel_error(out, correct_out)

# Problem 3.1.2
###################################################################################
#   Affine layer: backward.                                                       #
###################################################################################
#   In the file layers.py implement the affine_backward function.                 #
#   Once you are done you can test your implementation using numeric gradient.    #
###################################################################################
Ejemplo n.º 18
0
        def loss(self, X, y=None):
            """
            Compute loss and gradient for the fully-connected net.
            Input / output: Same as TwoLayerNet above.
            """
            X = X.astype(self.dtype)
            mode = 'test' if y is None else 'train'

            # Set train/test mode for batchnorm params and dropout param since they
            # behave differently during training and testing.
            if self.dropout_param is not None:
                self.dropout_param['mode'] = mode
            if self.use_batchnorm:
                for bn_param in self.bn_params:
                    bn_param['mode'] = mode

            scores = None
            ############################################################################
            # TODO: Implement the forward pass for the fully-connected net, computing  #
            # the class scores for X and storing them in the scores variable.          #
            #                                                                          #
            # When using dropout, you'll need to pass self.dropout_param to each       #
            # dropout forward pass.                                                    #
            #                                                                          #
            # When using batch normalization, you'll need to pass self.bn_params[0] to #
            # the forward pass for the first batch normalization layer, pass           #
            # self.bn_params[1] to the forward pass for the second batch normalization #
            # layer, etc.                                                              #
            ############################################################################
            layer_input = X
            ar_cache = {}
            dp_cache = {}

            for lay in xrange(self.num_layers - 1):
                if self.use_batchnorm:
                    layer_input, ar_cache[lay] = affine_bn_relu_forward(layer_input,
                                                                        self.params['W%d' % (lay + 1)],
                                                                        self.params['b%d' % (lay + 1)],
                                                                        self.params['gamma%d' % (lay + 1)],
                                                                        self.params['beta%d' % (lay + 1)],
                                                                        self.bn_params[lay])
                else:
                    layer_input, ar_cache[lay] = affine_relu_forward(layer_input, self.params['W%d' % (lay + 1)],
                                                                     self.params['b%d' % (lay + 1)])

                if self.use_dropout:
                    layer_input, dp_cache[lay] = dropout_forward(layer_input, self.dropout_param)

            ar_out, ar_cache[self.num_layers] = affine_forward(layer_input, self.params['W%d' % (self.num_layers)],
                                                               self.params['b%d' % (self.num_layers)])
            scores = ar_out
            # pass
            ############################################################################
            #                             END OF YOUR CODE                             #
            ############################################################################

            # If test mode return early
            if mode == 'test':
                return scores

            loss, grads = 0.0, {}
            ############################################################################
            # TODO: Implement the backward pass for the fully-connected net. Store the #
            # loss in the loss variable and gradients in the grads dictionary. Compute #
            # data loss using softmax, and make sure that grads[k] holds the gradients #
            # for self.params[k]. Don't forget to add L2 regularization!               #
            #                                                                          #
            # When using batch normalization, you don't need to regularize the scale   #
            # and shift parameters.                                                    #
            #                                                                          #
            # NOTE: To ensure that your implementation matches ours and you pass the   #
            # automated tests, make sure that your L2 regularization includes a factor #
            # of 0.5 to simplify the expression for the gradient.                      #
            ############################################################################
            loss, dscores = softmax_loss(scores, y)
            dhout = dscores
            loss = loss + 0.5 * self.reg * np.sum(
                self.params['W%d' % (self.num_layers)] * self.params['W%d' % (self.num_layers)])
            dx, dw, db = affine_backward(dhout, ar_cache[self.num_layers])
            grads['W%d' % (self.num_layers)] = dw + self.reg * self.params['W%d' % (self.num_layers)]
            grads['b%d' % (self.num_layers)] = db
            dhout = dx
            for idx in xrange(self.num_layers - 1):
                lay = self.num_layers - 1 - idx - 1
                loss = loss + 0.5 * self.reg * np.sum(self.params['W%d' % (lay + 1)] * self.params['W%d' % (lay + 1)])
                if self.use_dropout:
                    dhout = dropout_backward(dhout, dp_cache[lay])
                if self.use_batchnorm:
                    dx, dw, db, dgamma, dbeta = affine_bn_relu_backward(dhout, ar_cache[lay])
                else:
                    dx, dw, db = affine_relu_backward(dhout, ar_cache[lay])
                grads['W%d' % (lay + 1)] = dw + self.reg * self.params['W%d' % (lay + 1)]
                grads['b%d' % (lay + 1)] = db
                if self.use_batchnorm:
                    grads['gamma%d' % (lay + 1)] = dgamma
                    grads['beta%d' % (lay + 1)] = dbeta
                dhout = dx
            # pass
            ############################################################################
            #                             END OF YOUR CODE                             #
            ############################################################################

            return loss, grads
Ejemplo n.º 19
0
    def loss(self,X,y=None):
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        if self.use_dropout :
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None


        inputi = X
        batch_size = X.shape[0]
        X = np.reshape(X,[batch_size,-1])

        fc_cache_list = []
        relu_cache_list = []
        bn_cache_list = []
        dropout_cache_list = []


        for i in range(self.num_layers-1):
            fc_act,fc_cache= affine_forward(X,self.params['W'+str(i+1)],self.params['b'+str(i+1)])
            fc_cache_list.append(fc_cache)
            if self.use_batchnorm:
                bn_act,bn_cache = batchnorm_forward(fc_act,self.params['gamma'+str(i+1)],self.params['beta'+str(i+1)],self.bn_params[i])
                bn_cache_list.append(bn_cache)
                relu_act,relu_cache = relu_forward(bn_act)
                relu_cache_list.append(relu_cache)
            else:
                relu_act,relu_cache = relu_forward(fc_act)
                relu_cache_list.append(relu_cache)
            if self.use_dropout:
                relu_act,dropout_cache = dropout_forward(relu_act,self.dropout_param)
                dropout_cache_list.append(dropout_cache)

            X = relu_act.copy()
        ########最后一层
        scores,final_cache = affine_forward(X,self.params['W'+str(self.num_layers)],self.params['b'+str(self.num_layers)])
        #
        # for layer in range(self.num_layers):
        #     Wi,bi = self.params['W%d'%(layer+1)],self.params['b%d'%(layer+1)]
        #     outi,fc_cachei = affine_forward(inputi,Wi,bi)
        #     fc_cache_list.append(fc_cachei)
        #
        #     if self.use_batchnorm and layer!=self.num_layers-1:
        #         gammai,betai = self.params['gamma%d'%(layer+1)],self.params['beta%d'%(layer+1)]
        #
        #         outi,bn_cachei = batchnorm_forward(outi,gammai,betai,self.bn_params[layer])
        #         bn_cache_list.append(bn_cachei)
        #     outi,relu_cachei = relu_forward(outi)
        #     relu_cache_list.append(relu_cachei)
        #
        #     if self.use_dropout:
        #         outi,dropout_cachei = dropout_forward(outi,self.dropout_param)
        #         dropout_cache_list.append(dropout_cachei)
        #
        #     inputi = outi
        #
        # scores = outi

        if mode == 'test':
            return scores

        loss,grads = 0.0,{}

        loss,dsoft = softmax_loss(scores,y)
        loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(self.num_layers)])))
        #########最后一层的反向传播
        dx_last,dw_last,db_last = affine_backward(dsoft,final_cache)
        grads['W'+str(self.num_layers)] = dw_last+self.reg*self.params['W'+str(self.num_layers)]
        grads['b'+str(self.num_layers)] = db_last

        for i in range(self.num_layers-1,0,-1):

            if self.use_dropout:
                dx_last = dropout_backward(dx_last,dropout_cache_list[i-1])

            drelu = relu_backward(dx_last,relu_cache_list[i-1])
            if self.use_batchnorm:
                dbatchnorm,dgamma,dbeta = batchnorm_backward(drelu,bn_cache_list[i-1])
                dx_last,dw_last,db_last = affine_backward(dbatchnorm,fc_cache_list[i-1])
                grads['beta'+str(i)] = dbeta
                grads['gamma'+str(i)] = dgamma
            else:
                dx_last,dw_last,db_last = affine_backward(drelu,fc_cache_list[i-1])

            grads['W'+str(i)] = dw_last+self.reg*self.params['W'+str(i)]
            grads['b'+str(i)] = db_last

            loss += 0.5*self.reg*(np.sum(np.square(self.params['W'+str(i)])))

        return loss,grads
Ejemplo n.º 20
0
#######################################################################################
from layers import affine_forward

num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
weight_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
w = np.linspace(-0.2, 0.3, num=weight_size).reshape(np.prod(input_shape),
                                                    output_dim)
b = np.linspace(-0.3, 0.1, num=output_dim)

out, _ = affine_forward(x, w, b)
correct_out = np.array([[1.49834967, 1.70660132, 1.91485297],
                        [3.25553199, 3.5141327, 3.77273342]])

# The error should be around 1e-9.
print('Testing affine_forward function:')
print('difference: ', rel_error(out, correct_out))
#######################################################################################

#######################################################################################
# Test the affine_backward function
#######################################################################################
from layers import affine_backward

np.random.seed(231)
x = np.random.randn(10, 2, 3)
Ejemplo n.º 21
0
# Test the affine_forward function

num_inputs = 2
input_shape = (4, 5, 6)
output_dim = 3

input_size = num_inputs * np.prod(input_shape)
theta_size = output_dim * np.prod(input_shape)

x = np.linspace(-0.1, 0.5, num=input_size).reshape(num_inputs, *input_shape)
theta = np.linspace(-0.2, 0.3,
                    num=theta_size).reshape(np.prod(input_shape), output_dim)
theta_0 = np.linspace(-0.3, 0.1, num=output_dim)

out, _ = layers.affine_forward(x, theta, theta_0)
correct_out = np.array([[1.49834967,  1.70660132,  1.91485297],
                        [3.25553199,  3.5141327,   3.77273342]])

# Compare your output with ours. The error should be around 1e-9.
print 'Testing affine_forward function:'
print 'difference (should be around 1e-9): ', rel_error(out, correct_out)

# Problem 3.1.2
##########################################################################
#   Affine layer: backward.                                                       #
##########################################################################
#   In the file layers.py implement the affine_backward function.                 #
#   Once you are done you can test your implementation using numeric gradient.    #
##########################################################################
Ejemplo n.º 22
0
    def loss(self, X, y=None):
        """
        Compute loss and gradient for the fully-connected net.

        Input / output: Same as TwoLayerNet above.
        """
        X = X.astype(self.dtype)
        mode = 'test' if y is None else 'train'

        # Set train/test mode for batchnorm params and dropout param since they
        # behave differently during training and testing.
        if self.use_dropout:
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for bn_param in self.bn_params:
                bn_param['mode'] = mode

        scores = None
        cache = self.num_layers * [None]
        dropout_cache = (self.num_layers - 1) * [None]
        for i in np.arange(self.num_layers - 1):
            if not self.use_batchnorm:
                scores, cache[i] = affine_relu_forward(
                    X if i == 0 else scores, self.params['W%d' % (i + 1)],
                    self.params['b%d' % (i + 1)])
            else:
                scores, cache[i] = affine_bn_relu_forward(
                    X if i == 0 else scores, self.params['W%d' % (i + 1)],
                    self.params['b%d' % (i + 1)],
                    self.params['gamma%d' % (i + 1)],
                    self.params['beta%d' % (i + 1)], self.bn_params[i])
            if self.use_dropout:
                scores, dropout_cache[i] = dropout_forward(
                    scores, self.dropout_param)

        scores, cache[self.num_layers - 1] = affine_forward(
            scores, self.params['W%d' % self.num_layers],
            self.params['b%d' % self.num_layers])
        ############################################################################
        # When using batch normalization, you'll need to pass self.bn_params[0] to #
        # the forward pass for the first batch normalization layer, pass           #
        # self.bn_params[1] to the forward pass for the second batch normalization #
        # layer, etc.                                                              #
        ############################################################################

        # If test mode return early
        if mode == 'test':
            return scores

        loss, grads = 0.0, {}
        loss, dscore = softmax_loss(scores, y)
        dx, grads['W%d' %
                  self.num_layers], grads['b%d' %
                                          self.num_layers] = affine_backward(
                                              dscore,
                                              cache[self.num_layers - 1])

        for i in reversed(np.arange(self.num_layers - 1)):
            if self.use_dropout:
                dx = dropout_backward(dx, dropout_cache[i])
            if not self.use_batchnorm:
                dx, grads['W%d' %
                          (i + 1)], grads['b%d' %
                                          (i + 1)] = affine_relu_backward(
                                              dx, cache[i])
            else:
                dx, grads['W%d' % (i+1)], grads['b%d' % (i+1)], grads['gamma%d' % (i+1)], grads['beta%d' % (i+1)] \
                    = affine_bn_relu_backward(dx, cache[i])

        for i in np.arange(self.num_layers):
            loss += .5 * self.reg * np.sum(
                np.square(self.params['W%d' % (i + 1)]))
            grads['W%d' % (i + 1)] += self.reg * self.params['W%d' % (i + 1)]
        ############################################################################
        # When using batch normalization, you don't need to regularize the scale   #
        # and shift parameters.                                                    #
        #                                                                          #
        ############################################################################

        return loss, grads
Ejemplo n.º 23
0
    def build_sampler(self, max_len=20):
        """
        Input:
        - max_len: max length for generating cations
        Place Holder:
        - features: input image features of shape (N, L, D)
        
        Returns
        - sampled_words: sampled word indices
        - alphas: sampled alpha weights
        """

        # place holder
        features = self.features

        #parameters
        params = self.params

        # hyper parameters
        hyper_params = {
            'batch_size': self.N,
            'spacial_size': self.L,
            'dim_feature': self.D,
            'n_time_step': self.T,
            'dim_hidden': self.H,
            'vocab_size': self.V
        }

        # generate initial hidden state using cnn features
        mean_features = tf.reduce_mean(features, 1)
        prev_h = affine_tanh_forward(mean_features, params['W_init_h'],
                                     params['b_init_h'])  # (N, H)
        prev_c = affine_tanh_forward(mean_features, params['W_init_c'],
                                     params['b_init_c'])  # (N, h)

        sampled_word_list = []
        alpha_list = []

        for t in range(max_len):
            # embed the previous generated word
            if t == 0:
                x = tf.zeros([
                    self.N, self.M
                ])  # what about assign word vector for '<START>' token ?
            else:
                x = word_embedding_forward(sampled_word,
                                           params['W_embed'])  # (N, M)

            # lstm forward
            if self.cell_type == 'rnn':
                h, alpha = rnn_step_forward_with_attention(
                    x, features, prev_h, params,
                    hyper_params)  #  (N, H), (N, L)
            else:
                h, c, alpha = lstm_step_forward_with_attention(
                    x, features, prev_h, prev_c, params,
                    hyper_params)  # (N, H), (N, H), (N, L)
                prev_c = c

            # prepare for next time step
            prev_h = h

            # save alpha weights
            alpha_list.append(alpha)

            # generate scores(logits) from current hidden state
            logits = affine_forward(h, params['W_vocab'],
                                    params['b_vocab'])  # (N, V)

            # sample word indices with logits
            sampled_word = tf.argmax(
                logits, 1)  # (N, ) where value is in the range of [0, V)
            sampled_word_list.append(
                sampled_word)  # tensor flow doesn't provide item assignment

        alphas = tf.transpose(tf.pack(alpha_list), (1, 0, 2))  #  (N, T, L)
        sampled_captions = tf.transpose(tf.pack(sampled_word_list),
                                        (1, 0))  # (N, max_len)

        return alphas, sampled_captions