Example #1
0
    def loss(
        self,
        X: np.ndarray,
        y: Union[None, np.ndarray] = None
    ) -> Union[np.ndarray, Tuple[np.ndarray, np.ndarray]]:
        """
        Classification loss used to train the network.

        Inputs:
            - X : Array of data of shape (N, 3, 64, 64)
            - y : Array of labels of shape (N,)
        """

        if y is None:
            mode = 'test'
        else:
            mode = 'train'

        scores, cache = self.forward(X, mode=mode)
        if mode == 'test':
            return scores

        loss, dscores = layers.softmax_loss(scores, y)
        dx, grads = self.backward(dscores, cache)

        return loss, grads
Example #2
0
    def loss(self, X, y=None):
        """
        Evaluate loss and gradient for the three-layer convnet
        """

        X = X.astype(self.dtype)  # convert datatype
        if y is None:
            mode = 'test'
        else:
            mode = 'train'

        # TODO: Batchnorm here

        N = X.shape[0]
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']

        # TODO : more batchnorm stuff here

        fsize = W1.shape[2]
        conv_param = {'stride': 1, 'pad': int((fsize - 1) / 2)}
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
        scores = None

        # ===============================
        # FORWARD PASS
        # ===============================
        x = X
        w = W1
        b = b1
        # Forward into the conv layer
        # TODO : batchnorm
        conv_layer, cache_conv_layer = conv_layers.conv_relu_pool_forward(
            x, w, b, conv_param, pool_param)

        N, F, Hp, Wp = conv_layer.shape  # Shape of output

        # Forward into the hidden layer
        x = conv_layer.reshape((N, F, Hp * Wp))
        w = W2
        b = b2
        hidden_layer, cache_hidden_layer = layers.affine_relu_forward(x, w, b)
        N, Hh = hidden_layer.shape

        # Forward into linear output layer
        x = hidden_layer
        w = W3
        b = b3
        scores, cache_scores = layers.affine_forward(x, w, b)

        if mode == 'test':
            return scores

        loss = 0
        grads = {}
        # ===============================
        # BACKWARD PASS
        # ===============================
        data_loss, dscores = layers.softmax_loss(scores, y)
        reg_loss = 0.5 * self.reg * np.sum(W1**2)
        reg_loss = 0.5 * self.reg * np.sum(W2**2)
        reg_loss = 0.5 * self.reg * np.sum(W3**2)
        loss = data_loss + reg_loss

        # backprop into output layer
        dx3, dW3, db3 = layers.affine_backward(dscores, cache_scores)
        dW3 += self.reg * W3

        # backprop into first fc layer
        dx2, dW2, db2 = layers.affine_relu_backward(dx3, cache_hidden_layer)
        dW2 += self.reg * W2

        # Backprop into conv layer
        dx2 = dx2.reshape(N, F, Hp,
                          Wp)  # Note - don't forget to reshape here...
        dx, dW1, db1 = conv_layers.conv_relu_pool_backward(
            dx2, cache_conv_layer)
        dW1 += self.reg * W1

        grads.update({
            'W1': dW1,
            'W2': dW2,
            'W3': dW3,
            'b1': db1,
            'b2': db2,
            'b3': db3
        })

        return loss, grads
Example #3
0
    def loss(self, X, y=None):
        X = X.astype(self.dtype)
        if y is None:
            mode = 'test'
        else:
            mode = 'train'

        # TODO : worry about dropout, batchnorm, later
        # Set batchnorm params based on whether this is a training or a test
        # run
        #self.dropout_param['mode'] = mode
        #if self.use_batchnorm:
        #    for k, bn_param in self.bn_params.items():
        #        bn_param[mode] = mode

        # ===============================
        # FORWARD PASS
        # ===============================
        #hidden['h0'] = X.reshape(X.shape[0], np.prod(X.shape[1:]))   # TODO ; Check this...
        h = X.reshape(X.shape[0], np.prod(X.shape[1:]))
        for l in range(self.num_layers):
            idx = l + 1
            print("Layer %d forward pass" % idx)
            print(self.layers[l])
            # TODO : Dropout, batchnorm, etc
            h = self.layers[l].forward(h)
            if self.verbose:
                print(h.shape)

        #scores = hidden['h' + str(self.num_layers)]
        scores = h

        if mode == 'test':
            return scores

        loss = 0.0
        #grads = {}

        # Compute loss
        data_loss, dscores = layers.softmax_loss(scores, y)
        reg_loss = 0
        for l in range(self.num_layers):
            reg_loss + -0.5 * self.reg * np.sum(
                self.layers[l].W * self.layers[l].W)

        loss = data_loss + reg_loss

        # ===============================
        # BACKWARD PASS
        # ===============================
        #hidden['dh' + str(self.num_layers)] = dscores
        for l in range(self.num_layers)[::-1]:
            idx = l + 1

            # TODO : Need to create a structure in which to put all the
            # various derivatives for parameter update later
            lgrads = self.layers[l].backward(dh)
            dh = lgrads[0]

            #dh = hidden['dh' + str(idx)]
            #h_cache = hidden['cache_h' + str(idx)]

            #if idx == self.num_layers:
            #    dh, dw, db = layers.affine_backward(dh, h_cache)
            #    hidden['dh' + str(idx-1)] = dh
            #    hidden['dW' + str(idx)] = dw
            #    hidden['db' + str(idx)] = db
            #else:
            #    if self.use_dropout:
            #        cache_hdrop = hidden['cache_hdrop' + str(idx)]
            #        dh = layers.dropout_backward(dh, cache_hdrop)
            #    if self.use_batchnorm:
            #        dh, dw, db, dgamma, dbeta = layers.affine_norm_relu_backward(dh, h_cache)
            #        hidden['dh' + str(idx-1)] = dh
            #        hidden['dW' + str(idx)] = dw
            #        hidden['db' + str(idx)] = db
            #        hidden['dgamma' + str(idx)] = dgamma
            #        hidden['dbeta' + str(idx)] = dbeta
            #    else:
            #        dh, dw, db = layers.affine_relu_backward(dh, h_cache)         # TODO This layer definition
            #        hidden['dh' + str(idx-1)] = dh
            #        hidden['dW' + str(idx)] = dw
            #        hidden['db' + str(idx)] = db

        return loss
Example #4
0
    def loss(self, X: np.ndarray, y: Union[None, np.ndarray] = None) -> Any:
        """
        Evaluate loss and gradient for the convnet
        """

        X = X.astype(self.dtype)
        N = X.shape[0]
        if y is None:
            mode = 'test'
        else:
            mode = 'train'

        # Layer parameters
        conv_param = {'stride': 1, 'pad': int((self.filter_size - 1) / 2)}
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
        if self.use_batchnorm:
            for k, bn in self.bn_params.items():
                bn[mode] = mode

        scores = None
        blocks = {}
        blocks['h0'] = X
        # ===============================
        # FORWARD PASS
        # ===============================

        # Forward into conv block
        for l in range(self.L):
            idx = l + 1
            W = self.params['W' + str(idx)]
            b = self.params['b' + str(idx)]
            h = blocks['h' + str(idx - 1)]

            if self.use_batchnorm:
                beta = self.params['beta' + str(idx)]
                gamma = self.params['gamma' + str(idx)]
                bn_param = self.bn_params['bn_param' + str(idx)]
                h, cache_h = conv_layers.conv_norm_relu_pool_forward(
                    h, W, b, conv_param, pool_param, gamma, beta, bn_param)
            else:
                h, cache_h = conv_layers.conv_relu_pool_forward(
                    h, W, b, conv_param, pool_param)
            blocks['h' + str(idx)] = h
            blocks['cache_h' + str(idx)] = cache_h

        # Forward into linear blocks
        for l in range(self.M):
            idx = self.L + l + 1
            h = blocks['h' + str(idx - 1)]
            if l == 0:
                h = h.reshape(N, np.prod(h.shape[1:]))

            W = self.params['W' + str(idx)]
            b = self.params['b' + str(idx)]

            if self.use_batchnorm:
                beta = self.params['beta' + str(idx)]
                gamma = self.params['gamma' + str(idx)]
                bn_param = self.bn_params['bn_param' + str(idx)]
                h, cache_h = layers.affine_norm_relu_forward(
                    h, W, b, gamma, beta, bn_param)
            else:
                h, cache_h = layers.affine_relu_forward(h, W, b)
            blocks['h' + str(idx)] = h
            blocks['cache_h' + str(idx)] = cache_h

        # Forward into the score
        idx = self.L + self.M + 1
        W = self.params['W' + str(idx)]
        b = self.params['b' + str(idx)]
        h = blocks['h' + str(idx - 1)]
        h, cache_h = layers.affine_forward(h, W, b)
        blocks['h' + str(idx)] = h
        blocks['cache_h' + str(idx)] = cache_h

        scores = blocks['h' + str(idx)]

        if y is None:
            return scores

        loss = 0.0
        grads: Dict[str, Any] = {}
        # Compute the loss
        data_loss, dscores = layers.softmax_loss(scores, y)
        reg_loss = 0.0
        for k in self.params.keys():
            if k[0] == 'W':
                for w in self.params[k]:
                    reg_loss += 0.5 * self.reg * np.sum(w * w)
        loss = data_loss + reg_loss

        # ===============================
        # BACKWARD PASS
        # ===============================
        idx = self.L + self.M + 1
        dh = dscores
        h_cache = blocks['cache_h' + str(idx)]
        dh, dW, db = layers.affine_backward(dh, h_cache)
        blocks['dh' + str(idx - 1)] = dh
        blocks['dW' + str(idx)] = dW
        blocks['db' + str(idx)] = db

        # Backprop into the linear layers
        for l in range(self.M)[::-1]:
            idx = self.L + l + 1
            dh = blocks['dh' + str(idx)]
            h_cache = blocks['cache_h' + str(idx)]
            if self.use_batchnorm:
                dh, dW, db, dgamma, dbeta = layers.affine_norm_relu_backward(
                    dh, h_cache)
                blocks['dgamma' + str(idx)] = dgamma
                blocks['dbeta' + str(idx)] = dbeta
            else:
                dh, dW, db = layers.affine_relu_backward(dh, h_cache)
            blocks['dh' + str(idx - 1)] = dh
            blocks['dW' + str(idx)] = dW
            blocks['db' + str(idx)] = db

        # Backprop into conv blocks
        for l in range(self.L)[::-1]:
            idx = l + 1
            dh = blocks['dh' + str(idx)]
            h_cache = blocks['cache_h' + str(idx)]
            if l == max(range(self.L)[::-1]):
                dh = dh.reshape(*blocks['h' + str(idx)].shape)

            if self.use_batchnorm:
                dh, dW, db, dgamma, dbeta = conv_layers.conv_norm_relu_pool_backward(
                    dh, h_cache)
                blocks['dgamma' + str(idx)] = dgamma
                blocks['dbeta' + str(idx)] = dbeta
            else:
                dh, dW, db = conv_layers.conv_relu_pool_backward(dh, h_cache)
            blocks['dh' + str(idx - 1)] = dh
            blocks['dW' + str(idx)] = dW
            blocks['db' + str(idx)] = db

        # Add reg term to W gradients
        dw_list = {}
        for key, val in blocks.items():
            if key[:2] == 'dW':
                dw_list[key[1:]] = val + self.reg * self.params[key[1:]]

        db_list = {}
        for key, val in blocks.items():
            if key[:2] == 'db':
                db_list[key[1:]] = val

        ## TODO : This is a hack
        dgamma_list = {}
        for key, val in blocks.items():
            if key[:6] == 'dgamma':
                dgamma_list[key[1:]] = val

        # TODO : This is a hack
        dbeta_list = {}
        for key, val in blocks.items():
            if key[:5] == 'dbeta':
                dbeta_list[key[1:]] = val

        grads = {}
        grads.update(dw_list)
        grads.update(db_list)
        grads.update(dgamma_list)
        grads.update(dbeta_list)

        return loss, grads
Example #5
0
    def loss(self,
             X: np.ndarray,
             y: Union[np.ndarray, None] = None) -> Union[np.ndarray, Any]:
        """
        LOSS
        Compute loss and gradients for the fully connected network
        """

        X = X.astype(self.dtype)
        if y is None:
            mode = 'test'
        else:
            mode = 'train'

            # Set batchnorm params based on whether this is a training or a test
            # run
            self.dropout_param['mode'] = mode
        if self.use_batchnorm:
            for k, bn_param in self.bn_params.items():
                bn_param[mode] = mode

        # ===============================
        # FORWARD PASS
        # ===============================
        hidden = {}
        hidden['h0'] = X.reshape(X.shape[0],
                                 np.prod(X.shape[1:]))  # TODO ; Check this...

        if self.use_dropout:
            hdrop, cache_hdrop = layers.dropout_forward(
                hidden['h0'], self.dropout_param)
            hidden['hdrop0'] = hdrop
            hidden['cache_hdrop0'] = cache_hdrop

        # Iterate over layers
        for l in range(self.num_layers):
            idx = l + 1
            w = self.params['W' + str(idx)]
            b = self.params['b' + str(idx)]

            if self.use_dropout:
                h = hidden['hdrop' + str(idx - 1)]
            else:
                h = hidden['h' + str(idx - 1)]

            if self.use_batchnorm and idx != self.num_layers:
                gamma = self.params['gamma' + str(idx)]
                beta = self.params['beta' + str(idx)]
                bn_param = self.bn_params['bn_param' + str(idx)]

            # Compute the forward pass
            # output layer is a special case
            if idx == self.num_layers:
                h, cache_h = layers.affine_forward(h, w, b)
                hidden['h' + str(idx)] = h
                hidden['cache_h' + str(idx)] = cache_h
            else:
                if self.use_batchnorm:
                    h, cache_h = layers.affine_norm_relu_forward(
                        h, w, b, gamma, beta, bn_param)
                    hidden['h' + str(idx)] = h
                    hidden['cache_h' + str(idx)] = cache_h
                else:
                    h, cache_h = layers.affine_relu_forward(h, w, b)
                    hidden['h' + str(idx)] = h
                    hidden['cache_h' + str(idx)] = cache_h

                if self.use_dropout:
                    h = hidden['h' + str(idx)]
                    hdrop, cache_hdrop = layers.dropout_forward(
                        h, self.dropout_param)
                    hidden['hdrop' + str(idx)] = hdrop
                    hidden['cache_hdrop' + str(idx)] = cache_hdrop

        scores = hidden['h' + str(self.num_layers)]

        if mode == 'test':
            return scores

        loss = 0.0
        grads: Dict[str, Any] = {}
        # Compute loss
        data_loss, dscores = layers.softmax_loss(scores, y)
        reg_loss = 0
        for k in self.params.keys():
            if k[0] == 'W':
                for w in self.params[k]:
                    reg_loss += 0.5 * self.reg * np.sum(w * w)

        loss = data_loss + reg_loss
        # ===============================
        # BACKWARD PASS
        # ===============================
        hidden['dh' + str(self.num_layers)] = dscores
        for l in range(self.num_layers)[::-1]:
            idx = l + 1
            dh = hidden['dh' + str(idx)]
            h_cache = hidden['cache_h' + str(idx)]

            if idx == self.num_layers:
                dh, dw, db = layers.affine_backward(dh, h_cache)
                hidden['dh' + str(idx - 1)] = dh
                hidden['dW' + str(idx)] = dw
                hidden['db' + str(idx)] = db
            else:
                if self.use_dropout:
                    cache_hdrop = hidden['cache_hdrop' + str(idx)]
                    dh = layers.dropout_backward(dh, cache_hdrop)
                if self.use_batchnorm:
                    dh, dw, db, dgamma, dbeta = layers.affine_norm_relu_backward(
                        dh, h_cache)
                    hidden['dh' + str(idx - 1)] = dh
                    hidden['dW' + str(idx)] = dw
                    hidden['db' + str(idx)] = db
                    hidden['dgamma' + str(idx)] = dgamma
                    hidden['dbeta' + str(idx)] = dbeta
                else:
                    dh, dw, db = layers.affine_relu_backward(
                        dh, h_cache)  # TODO This layer definition
                    hidden['dh' + str(idx - 1)] = dh
                    hidden['dW' + str(idx)] = dw
                    hidden['db' + str(idx)] = db

        # w gradients where we add the regularization term
        # TODO :' Tidy this up
        dw_list = {}
        for key, val in hidden.items():
            if key[:2] == 'dW':
                dw_list[key[1:]] = val + self.reg * self.params[key[1:]]

        db_list = {}
        for key, val in hidden.items():
            if key[:2] == 'db':
                db_list[key[1:]] = val

        # TODO : This is a hack
        dgamma_list = {}
        for key, val in hidden.items():
            if key[:6] == 'dgamma':
                dgamma_list[key[1:]] = val

        # TODO : This is a hack
        dbeta_list = {}
        for key, val in hidden.items():
            if key[:5] == 'dbeta':
                dbeta_list[key[1:]] = val

        grads = {}
        grads.update(dw_list)
        grads.update(db_list)
        grads.update(dgamma_list)
        grads.update(dbeta_list)

        return loss, grads