Ejemplo n.º 1
0
    def loss(self, X, y=None):
        """
        Evaluate loss and gradient for the six-layer convolutional network.

        Input / output: Same API as TwoLayerNet in fc_net.py.
        """
        W1, b1 = self.params['W1'], self.params['b1']
        W2, b2 = self.params['W2'], self.params['b2']
        W3, b3 = self.params['W3'], self.params['b3']
        W4, b4 = self.params['W4'], self.params['b4']
        gamma1, beta1 = self.params['gamma1'], self.params['beta1']
        gamma2, beta2 = self.params['gamma2'], self.params['beta2']

        filter_size = W1.shape[2]
        conv_param = {'stride': 1, 'pad': (filter_size - 1) // 2}
        pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}
        bn_param = [{'mode': 'train'}, {'mode': 'train'}]

        scores = None
        a1, cache1 = conv_bn_relu_forward(X, W1, b1, gamma1, beta1, conv_param,
                                          bn_param[0])
        a2, cache2 = max_pool_forward_fast(a1, pool_param)
        a3, cache3 = conv_bn_relu_forward(a2, W2, b2, gamma2, beta2,
                                          conv_param, bn_param[1])
        a4, cache4 = max_pool_forward_fast(a3, pool_param)
        a5, cache5 = affine_relu_forward(a4, W3, b3)
        scores, cache6 = affine_forward(a5, W4, b4)

        if y is None:
            return scores

        loss, grads = 0, {}
        loss, dscore = softmax_loss(scores, y)

        da5, grads['W4'], grads['b4'] = affine_backward(dscore, cache6)
        da4, grads['W3'], grads['b3'] = affine_relu_backward(da5, cache5)
        da3 = max_pool_backward_fast(da4, cache4)
        da2, grads['W2'], grads['b2'], grads['gamma2'], grads[
            'beta2'] = conv_bn_relu_backward(da3, cache3)
        da1 = max_pool_backward_fast(da2, cache2)
        _, grads['W1'], grads['b1'], grads['gamma1'], grads[
            'beta1'] = conv_bn_relu_backward(da1, cache1)

        grads['W1'] += self.reg * W1
        grads['W2'] += self.reg * W2
        grads['W3'] += self.reg * W3
        grads['W4'] += self.reg * W4
        loss += 0.5 * self.reg * sum([np.sum(W**2) for W in [W1, W2, W3, W4]])

        return loss, grads
Ejemplo n.º 2
0
def conv_relu_pool_backward(dout, cache):
    """
    Backward pass for the conv-relu-pool convenience layer
    """
    conv_cache, relu_cache, pool_cache = cache
    ds = max_pool_backward_fast(dout, pool_cache)
    da = relu_backward(ds, relu_cache)
    dx, dw, db = conv_backward_fast(da, conv_cache)
    return dx, dw, db
def conv_relu_pool_backward(dout, cache):
    """
    Backward pass for the conv-relu-pool convenience layer
    """
    conv_cache, relu_cache, pool_cache = cache
    ds = max_pool_backward_fast(dout, pool_cache)
    da = relu_backward(ds, relu_cache)
    dx, dw, db = conv_backward_fast(da, conv_cache)
    return dx, dw, db
Ejemplo n.º 4
0
t0 = time()
out_naive, cache_naive = max_pool_forward_naive(x, pool_param)
t1 = time()
out_fast, cache_fast = max_pool_forward_fast(x, pool_param)
t2 = time()

print('Testing pool_forward_fast:')
print('Naive: %fs' % (t1 - t0))
print('fast: %fs' % (t2 - t1))
print('speedup: %fx' % ((t1 - t0) / (t2 - t1)))
print('difference: ', rel_error(out_naive, out_fast))

t0 = time()
dx_naive = max_pool_backward_naive(dout, cache_naive)
t1 = time()
dx_fast = max_pool_backward_fast(dout, cache_fast)
t2 = time()

print('\nTesting pool_backward_fast:')
print('Naive: %fs' % (t1 - t0))
print('fast: %fs' % (t2 - t1))
print('speedup: %fx' % ((t1 - t0) / (t2 - t1)))
print('dx difference: ', rel_error(dx_naive, dx_fast))

# Convolutional "sandwich" layers
# Previously we introduced the concept of "sandwich" layers that combine multiple
# operations into commonly used patterns. In the file cs231n/layer_utils.py you
# will find sandwich layers that implement a few commonly used patterns for
# convolutional networks.

from cs231n.layer_utils import conv_relu_pool_forward, conv_relu_pool_backward
Ejemplo n.º 5
0
 def _backward(self, x, cache):
     return max_pool_backward_fast(x, cache)
t0 = time()
out_naive, cache_naive = max_pool_forward_naive(x, pool_param)
t1 = time()
out_fast, cache_fast = max_pool_forward_fast(x, pool_param)
t2 = time()

print 'Testing pool_forward_fast:'
print 'Naive: %fs' % (t1 - t0)
print 'fast: %fs' % (t2 - t1)
print 'speedup: %fx' % ((t1 - t0) / (t2 - t1))
print 'difference: ', rel_error(out_naive, out_fast)

t0 = time()
dx_naive = max_pool_backward_naive(dout, cache_naive)
t1 = time()
dx_fast = max_pool_backward_fast(dout, cache_fast)
t2 = time()

print '\nTesting pool_backward_fast:'
print 'Naive: %fs' % (t1 - t0)
print 'speedup: %fx' % ((t1 - t0) / (t2 - t1))
print 'dx difference: ', rel_error(dx_naive, dx_fast)


# # Convolutional "sandwich" layers
# Previously we introduced the concept of "sandwich" layers that combine multiple operations into commonly used patterns. In the file `cs231n/layer_utils.py` you will find sandwich layers that implement a few commonly used patterns for convolutional networks.

# In[11]:

from cs231n.layer_utils import conv_relu_pool_forward, conv_relu_pool_backward