Esempio n. 1
0
    def test_affine_layer_backward(self):
        print("\n======== TestLayers.test_affine_layer_backward:")

        x = np.random.randn(10, 2, 3)
        w = np.random.randn(6, 5)
        b = np.random.randn(5)
        dout = np.random.randn(10, 5)

        dx_num = check_gradient.eval_numerical_gradient_array(
            lambda x: layers.affine_forward(x, w, b)[0], x, dout)
        dw_num = check_gradient.eval_numerical_gradient_array(
            lambda w: layers.affine_forward(x, w, b)[0], w, dout)
        db_num = check_gradient.eval_numerical_gradient_array(
            lambda b: layers.affine_forward(x, w, b)[0], b, dout)

        _, cache = layers.affine_forward(x, w, b)
        dx, dw, db = layers.affine_backward(dout, cache)

        dx_diff = error.rel_error(dx_num, dx)
        dw_diff = error.rel_error(dw_num, dw)
        db_diff = error.rel_error(db_num, db)

        print("dx error : %.9f" % dx_diff)
        print("dw error : %.9f" % dw_diff)
        print("db error : %.9f" % db_diff)

        # NOTE : occasionally we may randomly get a value greater than self.eps
        # here... I don't think its worth re-writing this test such that it can
        # pass every time, rather it might be better
        self.assertLessEqual(dx_diff, self.eps)
        self.assertLessEqual(dw_diff, self.eps)
        self.assertLessEqual(db_diff, self.eps)

        print("======== TestLayers.test_affine_layer_backward: <END> ")
Esempio n. 2
0
    def test_gradient(self):
        x = np.random.randn(10, 2, 3)
        w = np.random.randn(6, 5)
        b = np.random.randn(5)
        dout = np.random.randn(10, 5)

        dx_num = check_gradient.eval_numerical_gradient_array(
            lambda x: layers.affine_forward(x, w, b)[0], x, dout)
        dw_num = check_gradient.eval_numerical_gradient_array(
            lambda w: layers.affine_forward(x, w, b)[0], w, dout)
        db_num = check_gradient.eval_numerical_gradient_array(
            lambda b: layers.affine_forward(x, w, b)[0], b, dout)

        _, cache = layers.affine_forward(x, w, b)
        dx, dw, db = layers.affine_backward(dout, cache)

        print("dx error : %.6f " % error.rel_error(dx_num, dx))
        print("dw error : %.6f " % error.rel_error(dw_num, dw))
        print("db error : %.6f " % error.rel_error(db_num, db))
Esempio n. 3
0
    def backward(self, dout: np.ndarray,
                 cache: Tuple[Any, Any, Any]) -> Tuple[np.ndarray, np.ndarray]:
        """
        TODO : docstring
        """
        start, end, layer_caches = cache
        dnext_a = dout
        grads = {}

        for i in reversed(range(start, end + 1)):
            i1 = i + 1
            if i1 == len(self.conv_params) + 1:
                # Last affine layer
                dprev_a, dw, db = layers.affine_backward(
                    dnext_a, layer_caches.pop())
                grads['W%d' % i1] = dw
                grads['b%d' % i1] = db
            elif i == len(self.conv_params):
                # Affine hidden layer
                dprev_a, dw, db, dgamma, dbeta = layers.affine_norm_relu_backward(
                    dnext_a, layer_caches.pop())
                grads['W%d' % i1] = dw
                grads['b%d' % i1] = db
                grads['gamma%d' % i1] = dgamma
                grads['beta%d' % i1] = dbeta
            elif 0 <= i < len(self.conv_params):
                dprev_a, dw, db, dgamma, dbeta = layers.conv_bn_relu_backward(
                    dnext_a, layer_caches.pop())
                grads['W%d' % i1] = dw
                grads['b%d' % i1] = db
                grads['gamma%d' % i1] = dgamma
                grads['beta%d' % i1] = dbeta
            else:
                raise ValueError('Invalid layer index %d' % i)
            dnext_a = dprev_a

        dX = dnext_a

        return dX, grads
Esempio n. 4
0
    def loss(self, X, y=None):
        """
        Evaluate loss and gradient for the three-layer convnet
        """

        X = X.astype(self.dtype)  # convert datatype
        if y is None:
            mode = 'test'
        else:
            mode = 'train'

        # TODO: Batchnorm here

        N = X.shape[0]
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # TODO : more batchnorm stuff here

        fsize = W1.shape[2]
        conv_param = {'stride': 1, 'pad': int((fsize - 1) / 2)}
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
        scores = None

        # ===============================
        # FORWARD PASS
        # ===============================
        x = X
        w = W1
        b = b1
        # Forward into the conv layer
        # TODO : batchnorm
        conv_layer, cache_conv_layer = conv_layers.conv_relu_pool_forward(
            x, w, b, conv_param, pool_param)

        N, F, Hp, Wp = conv_layer.shape  # Shape of output

        # Forward into the hidden layer
        x = conv_layer.reshape((N, F, Hp * Wp))
        w = W2
        b = b2
        hidden_layer, cache_hidden_layer = layers.affine_relu_forward(x, w, b)
        N, Hh = hidden_layer.shape

        # Forward into linear output layer
        x = hidden_layer
        w = W3
        b = b3
        scores, cache_scores = layers.affine_forward(x, w, b)

        if mode == 'test':
            return scores

        loss = 0
        grads = {}
        # ===============================
        # BACKWARD PASS
        # ===============================
        data_loss, dscores = layers.softmax_loss(scores, y)
        reg_loss = 0.5 * self.reg * np.sum(W1**2)
        reg_loss = 0.5 * self.reg * np.sum(W2**2)
        reg_loss = 0.5 * self.reg * np.sum(W3**2)
        loss = data_loss + reg_loss

        # backprop into output layer
        dx3, dW3, db3 = layers.affine_backward(dscores, cache_scores)
        dW3 += self.reg * W3

        # backprop into first fc layer
        dx2, dW2, db2 = layers.affine_relu_backward(dx3, cache_hidden_layer)
        dW2 += self.reg * W2

        # Backprop into conv layer
        dx2 = dx2.reshape(N, F, Hp,
                          Wp)  # Note - don't forget to reshape here...
        dx, dW1, db1 = conv_layers.conv_relu_pool_backward(
            dx2, cache_conv_layer)
        dW1 += self.reg * W1

        grads.update({
            'W1': dW1,
            'W2': dW2,
            'W3': dW3,
            'b1': db1,
            'b2': db2,
            'b3': db3
        })

        return loss, grads
Esempio n. 5
0
    def loss(self, X: np.ndarray, y: Union[None, np.ndarray] = None) -> Any:
        """
        Evaluate loss and gradient for the convnet
        """

        X = X.astype(self.dtype)
        N = X.shape[0]
        if y is None:
            mode = 'test'
        else:
            mode = 'train'

        # Layer parameters
        conv_param = {'stride': 1, 'pad': int((self.filter_size - 1) / 2)}
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
        if self.use_batchnorm:
            for k, bn in self.bn_params.items():
                bn[mode] = mode

        scores = None
        blocks = {}
        blocks['h0'] = X
        # ===============================
        # FORWARD PASS
        # ===============================

        # Forward into conv block
        for l in range(self.L):
            idx = l + 1
            W = self.params['W' + str(idx)]
            b = self.params['b' + str(idx)]
            h = blocks['h' + str(idx - 1)]

            if self.use_batchnorm:
                beta = self.params['beta' + str(idx)]
                gamma = self.params['gamma' + str(idx)]
                bn_param = self.bn_params['bn_param' + str(idx)]
                h, cache_h = conv_layers.conv_norm_relu_pool_forward(
                    h, W, b, conv_param, pool_param, gamma, beta, bn_param)
            else:
                h, cache_h = conv_layers.conv_relu_pool_forward(
                    h, W, b, conv_param, pool_param)
            blocks['h' + str(idx)] = h
            blocks['cache_h' + str(idx)] = cache_h

        # Forward into linear blocks
        for l in range(self.M):
            idx = self.L + l + 1
            h = blocks['h' + str(idx - 1)]
            if l == 0:
                h = h.reshape(N, np.prod(h.shape[1:]))

            W = self.params['W' + str(idx)]
            b = self.params['b' + str(idx)]

            if self.use_batchnorm:
                beta = self.params['beta' + str(idx)]
                gamma = self.params['gamma' + str(idx)]
                bn_param = self.bn_params['bn_param' + str(idx)]
                h, cache_h = layers.affine_norm_relu_forward(
                    h, W, b, gamma, beta, bn_param)
            else:
                h, cache_h = layers.affine_relu_forward(h, W, b)
            blocks['h' + str(idx)] = h
            blocks['cache_h' + str(idx)] = cache_h

        # Forward into the score
        idx = self.L + self.M + 1
        W = self.params['W' + str(idx)]
        b = self.params['b' + str(idx)]
        h = blocks['h' + str(idx - 1)]
        h, cache_h = layers.affine_forward(h, W, b)
        blocks['h' + str(idx)] = h
        blocks['cache_h' + str(idx)] = cache_h

        scores = blocks['h' + str(idx)]

        if y is None:
            return scores

        loss = 0.0
        grads: Dict[str, Any] = {}
        # Compute the loss
        data_loss, dscores = layers.softmax_loss(scores, y)
        reg_loss = 0.0
        for k in self.params.keys():
            if k[0] == 'W':
                for w in self.params[k]:
                    reg_loss += 0.5 * self.reg * np.sum(w * w)
        loss = data_loss + reg_loss

        # ===============================
        # BACKWARD PASS
        # ===============================
        idx = self.L + self.M + 1
        dh = dscores
        h_cache = blocks['cache_h' + str(idx)]
        dh, dW, db = layers.affine_backward(dh, h_cache)
        blocks['dh' + str(idx - 1)] = dh
        blocks['dW' + str(idx)] = dW
        blocks['db' + str(idx)] = db

        # Backprop into the linear layers
        for l in range(self.M)[::-1]:
            idx = self.L + l + 1
            dh = blocks['dh' + str(idx)]
            h_cache = blocks['cache_h' + str(idx)]
            if self.use_batchnorm:
                dh, dW, db, dgamma, dbeta = layers.affine_norm_relu_backward(
                    dh, h_cache)
                blocks['dgamma' + str(idx)] = dgamma
                blocks['dbeta' + str(idx)] = dbeta
            else:
                dh, dW, db = layers.affine_relu_backward(dh, h_cache)
            blocks['dh' + str(idx - 1)] = dh
            blocks['dW' + str(idx)] = dW
            blocks['db' + str(idx)] = db

        # Backprop into conv blocks
        for l in range(self.L)[::-1]:
            idx = l + 1
            dh = blocks['dh' + str(idx)]
            h_cache = blocks['cache_h' + str(idx)]
            if l == max(range(self.L)[::-1]):
                dh = dh.reshape(*blocks['h' + str(idx)].shape)

            if self.use_batchnorm:
                dh, dW, db, dgamma, dbeta = conv_layers.conv_norm_relu_pool_backward(
                    dh, h_cache)
                blocks['dgamma' + str(idx)] = dgamma
                blocks['dbeta' + str(idx)] = dbeta
            else:
                dh, dW, db = conv_layers.conv_relu_pool_backward(dh, h_cache)
            blocks['dh' + str(idx - 1)] = dh
            blocks['dW' + str(idx)] = dW
            blocks['db' + str(idx)] = db

        # Add reg term to W gradients
        dw_list = {}
        for key, val in blocks.items():
            if key[:2] == 'dW':
                dw_list[key[1:]] = val + self.reg * self.params[key[1:]]

        db_list = {}
        for key, val in blocks.items():
            if key[:2] == 'db':
                db_list[key[1:]] = val

        ## TODO : This is a hack
        dgamma_list = {}
        for key, val in blocks.items():
            if key[:6] == 'dgamma':
                dgamma_list[key[1:]] = val

        # TODO : This is a hack
        dbeta_list = {}
        for key, val in blocks.items():
            if key[:5] == 'dbeta':
                dbeta_list[key[1:]] = val

        grads = {}
        grads.update(dw_list)
        grads.update(db_list)
        grads.update(dgamma_list)
        grads.update(dbeta_list)

        return loss, grads
Esempio n. 6
0
    def loss(self,
             X: np.ndarray,
             y: Union[np.ndarray, None] = None) -> Union[np.ndarray, Any]:
        """
        LOSS
        Compute loss and gradients for the fully connected network
        """

        X = X.astype(self.dtype)
        if y is None:
            mode = 'test'
        else:
            mode = 'train'

            # Set batchnorm params based on whether this is a training or a test
            # run
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for k, bn_param in self.bn_params.items():
                bn_param[mode] = mode

        # ===============================
        # FORWARD PASS
        # ===============================
        hidden = {}
        hidden['h0'] = X.reshape(X.shape[0],
                                 np.prod(X.shape[1:]))  # TODO ; Check this...

        if self.use_dropout:
            hdrop, cache_hdrop = layers.dropout_forward(
                hidden['h0'], self.dropout_param)
            hidden['hdrop0'] = hdrop
            hidden['cache_hdrop0'] = cache_hdrop

        # Iterate over layers
        for l in range(self.num_layers):
            idx = l + 1
            w = self.params['W' + str(idx)]
            b = self.params['b' + str(idx)]

            if self.use_dropout:
                h = hidden['hdrop' + str(idx - 1)]
            else:
                h = hidden['h' + str(idx - 1)]

            if self.use_batchnorm and idx != self.num_layers:
                gamma = self.params['gamma' + str(idx)]
                beta = self.params['beta' + str(idx)]
                bn_param = self.bn_params['bn_param' + str(idx)]

            # Compute the forward pass
            # output layer is a special case
            if idx == self.num_layers:
                h, cache_h = layers.affine_forward(h, w, b)
                hidden['h' + str(idx)] = h
                hidden['cache_h' + str(idx)] = cache_h
            else:
                if self.use_batchnorm:
                    h, cache_h = layers.affine_norm_relu_forward(
                        h, w, b, gamma, beta, bn_param)
                    hidden['h' + str(idx)] = h
                    hidden['cache_h' + str(idx)] = cache_h
                else:
                    h, cache_h = layers.affine_relu_forward(h, w, b)
                    hidden['h' + str(idx)] = h
                    hidden['cache_h' + str(idx)] = cache_h

                if self.use_dropout:
                    h = hidden['h' + str(idx)]
                    hdrop, cache_hdrop = layers.dropout_forward(
                        h, self.dropout_param)
                    hidden['hdrop' + str(idx)] = hdrop
                    hidden['cache_hdrop' + str(idx)] = cache_hdrop

        scores = hidden['h' + str(self.num_layers)]

        if mode == 'test':
            return scores

        loss = 0.0
        grads: Dict[str, Any] = {}
        # Compute loss
        data_loss, dscores = layers.softmax_loss(scores, y)
        reg_loss = 0
        for k in self.params.keys():
            if k[0] == 'W':
                for w in self.params[k]:
                    reg_loss += 0.5 * self.reg * np.sum(w * w)

        loss = data_loss + reg_loss
        # ===============================
        # BACKWARD PASS
        # ===============================
        hidden['dh' + str(self.num_layers)] = dscores
        for l in range(self.num_layers)[::-1]:
            idx = l + 1
            dh = hidden['dh' + str(idx)]
            h_cache = hidden['cache_h' + str(idx)]

            if idx == self.num_layers:
                dh, dw, db = layers.affine_backward(dh, h_cache)
                hidden['dh' + str(idx - 1)] = dh
                hidden['dW' + str(idx)] = dw
                hidden['db' + str(idx)] = db
            else:
                if self.use_dropout:
                    cache_hdrop = hidden['cache_hdrop' + str(idx)]
                    dh = layers.dropout_backward(dh, cache_hdrop)
                if self.use_batchnorm:
                    dh, dw, db, dgamma, dbeta = layers.affine_norm_relu_backward(
                        dh, h_cache)
                    hidden['dh' + str(idx - 1)] = dh
                    hidden['dW' + str(idx)] = dw
                    hidden['db' + str(idx)] = db
                    hidden['dgamma' + str(idx)] = dgamma
                    hidden['dbeta' + str(idx)] = dbeta
                else:
                    dh, dw, db = layers.affine_relu_backward(
                        dh, h_cache)  # TODO This layer definition
                    hidden['dh' + str(idx - 1)] = dh
                    hidden['dW' + str(idx)] = dw
                    hidden['db' + str(idx)] = db

        # w gradients where we add the regularization term
        # TODO :' Tidy this up
        dw_list = {}
        for key, val in hidden.items():
            if key[:2] == 'dW':
                dw_list[key[1:]] = val + self.reg * self.params[key[1:]]

        db_list = {}
        for key, val in hidden.items():
            if key[:2] == 'db':
                db_list[key[1:]] = val

        # TODO : This is a hack
        dgamma_list = {}
        for key, val in hidden.items():
            if key[:6] == 'dgamma':
                dgamma_list[key[1:]] = val

        # TODO : This is a hack
        dbeta_list = {}
        for key, val in hidden.items():
            if key[:5] == 'dbeta':
                dbeta_list[key[1:]] = val

        grads = {}
        grads.update(dw_list)
        grads.update(db_list)
        grads.update(dgamma_list)
        grads.update(dbeta_list)

        return loss, grads