예제 #1
0
def residual_layer(n_channels, n_out_channels=None, stride=None):
    n_out_channels = n_out_channels or n_channels
    stride = stride or 1

    convs = Sequential()
    add = convs.add
    add(SpatialConvolution(
        n_channels, n_out_channels, 3, 3, stride, stride, 1, 1))
    add(SpatialBatchNormalization(n_out_channels))
    add(SpatialConvolution(n_out_channels, n_out_channels, 3, 3, 1, 1, 1, 1))
    add(SpatialBatchNormalization(n_out_channels))

    if stride > 1:
        shortcut = Sequential()
        shortcut.add(SpatialAveragePooling(2, 2, stride, stride))
        shortcut.add(Padding(1, (n_out_channels - n_channels)//2, 3))
    else:
        shortcut = Identity()

    res = Sequential()
    res.add(ConcatTable().add(convs).add(shortcut)).add(CAddTable())
    # https://github.com/szagoruyko/wide-residual-networks/blob/master/models/resnet-pre-act.lua

    res.add(ReLU(True))

    return res
예제 #2
0
def lenet():
    model = Sequential()
    add = model.add
    add(SpatialConvolution(1, 6, 5, 5, 1, 1, 0, 0))
    add(ReLU())
    add(SpatialMaxPooling(2, 2, 2, 2))
    add(SpatialConvolution(6, 16, 5, 5, 1, 1, 0, 0))
    add(ReLU())
    add(SpatialMaxPooling(2, 2, 2, 2))
    add(Reshape(400))
    add(Linear(400, 120))
    add(ReLU())
    add(Linear(120, 84))
    add(ReLU())
    add(Linear(84, 10))
    add(LogSoftMax())
    return model
예제 #3
0
def resnet(n_size, num_starting_filters, reg):
    '''
    Implementation of ["Deep Residual Learning for Image Recognition",Kaiming \
    He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - http://arxiv.org/abs/1512.03385

    Inspired by https://github.com/gcr/torch-residual-networks

    This network should model a similiar behaviour of gcr's implementation.
    Check https://github.com/gcr/torch-residual-networks for more infos about \
    the structure.

    The network operates on minibatches of data that have shape (N, C, H, W)
    consisting of N images, each with height H and width W and with C input
    channels.

    The network has, like in the reference paper (except for the final optional
    affine layers), (6*n)+2 layers, composed as below:

                                                (image_dim: 3, 32, 32; F=16)
                                                (input_dim: N, *image_dim)
         INPUT
            |
            v
       +-------------------+
       |conv[F, *image_dim]|                    (out_shape: N, 16, 32, 32)
       +-------------------+
            |
            v
       +-------------------------+
       |n * res_block[F, F, 3, 3]|              (out_shape: N, 16, 32, 32)
       +-------------------------+
            |
            v
       +-------------------------+
       |res_block[2*F, F, 3, 3]  |              (out_shape: N, 32, 16, 16)
       +-------------------------+
            |
            v
       +---------------------------------+
       |(n-1) * res_block[2*F, 2*F, 3, 3]|      (out_shape: N, 32, 16, 16)
       +---------------------------------+
            |
            v
       +-------------------------+
       |res_block[4*F, 2*F, 3, 3]|              (out_shape: N, 64, 8, 8)
       +-------------------------+
            |
            v
       +---------------------------------+
       |(n-1) * res_block[4*F, 4*F, 3, 3]|      (out_shape: N, 64, 8, 8)
       +---------------------------------+
            |
            v
       +-------------+
       |pool[1, 8, 8]|                          (out_shape: N, 64, 1, 1)
       +-------------+
            |
            v
       +- - - - - - - - -+
       |(opt) m * affine |                      (out_shape: N, 64, 1, 1)
       +- - - - - - - - -+
            |
            v
       +-------+
       |softmax|                                (out_shape: N, num_classes)
       +-------+
            |
            v
         OUTPUT

    Every convolution layer has a pad=1 and stride=1, except for the  dimension
    enhancning layers which has a stride of 2 to mantain the computational
    complexity.
    Optionally, there is the possibility of setting m affine layers immediatley
    before the softmax layer by setting the hidden_dims parameter, which should
    be a list of integers representing the numbe of neurons for each affine
    layer.

    Each residual block is composed as below:

                  Input
                     |
             ,-------+-----.
       Downsampling      3x3 convolution+dimensionality reduction
            |               |
            v               v
       Zero-padding      3x3 convolution
            |               |
            `-----( Add )---'
                     |
                  Output

    After every layer, a batch normalization with momentum .1 is applied.

    Weight initialization (check also layers/init.py and layers/README.md):
    - Inizialize the weights and biases for the affine layers in the same
     way of torch's default mode by calling _init_affine_wb that returns a
     tuple (w, b).
    - Inizialize the weights for the conv layers in the same
     way of torch's default mode by calling init_conv_w.
    - Inizialize the weights for the conv layers in the same
     way of kaiming's mode by calling init_conv_w_kaiming
     (http://arxiv.org/abs/1502.01852 and
      http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-\
      initialization)
    - Initialize batch normalization layer's weights like torch's default by
    calling init_bn_w
    - Initialize batch normalization layer's weights like cgr's first resblock\
    's bn (https://github.com/gcr/torch-residual-networks/blob/master/residual\
           -layers.lua#L57-L59) by calling init_bn_w_gcr.

    num_filters=[16, 16, 32, 32, 64, 64],
        Initialize a new network.

        Inputs:
        - input_dim: Tuple (C, H, W) giving size of input data.
        - num_starting_filters: Number of filters for the first convolution
        layer.
        - n_size: nSize for the residual network like in the reference paper
        - hidden_dims: Optional list number of units to use in the
        fully-connected hidden layers between the fianl pool and the sofmatx
        layer.
        - num_classes: Number of scores to produce from the final affine layer.
        - reg: Scalar giving L2 regularization strength
        - dtype: numpy datatype to use for computation.
    '''

    nfs = num_starting_filters
    model = Sequential()
    add = model.add
    add(SpatialConvolution(3, nfs, 3, 3, 1, 1, 1, 1))
    add(SpatialBatchNormalization(nfs))
    add(ReLU())

    for i in range(1, n_size):
        add(residual_layer(nfs))
    add(residual_layer(nfs, 2*nfs, 2))

    for i in range(1, n_size-1):
        add(residual_layer(2*nfs))
    add(residual_layer(2*nfs, 4*nfs, 2))

    for i in range(1, n_size-1):
        add(residual_layer(4*nfs))

    add(SpatialAveragePooling(8, 8))
    add(Reshape(nfs*4))
    add(Linear(nfs*4, 10))
    add(LogSoftMax())
    return model
예제 #4
0
                    LogSoftMax)
from pyfunt.utils import eval_numerical_gradient_array


def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))


x = np.random.randn(3, 4, 8, 8)
# x = np.random.randn(3, 2, 8, 8)
dout = np.random.randn(3, 10)
pool_param = {'pool_height': 2, 'pool_width': 2, 'stride': 2}

s = Sequential()
s.add(SpatialConvolution(4, 2, 1, 1, 1, 1))
s.add(SpatialAveragePooling(2, 2, 2, 2, 0, 0))
s.add(SpatialBatchNormalization(2))
s.add(ReLU())
s.add(Reshape(2 * 4 * 4))
s.add(Linear(2 * 4 * 4, 10))
s.add(LogSoftMax())

dx_num = eval_numerical_gradient_array(lambda x: s.update_output(x), x, dout)

out = s.update_output(x)
dx = s.update_grad_input(x, dout)
# Your error should be around 1e-8
print('Testing net backward function:')
print('dx error: ', rel_error(dx, dx_num))
# import pdb; pdb.set_trace()