Ejemplo n.º 1
0
    def __init__(self, depth=9):
        self.depth = depth

        depth = 9
        train = (3, 32, 32)

        nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)]
        strides = [
            1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])
        ]

        # Now construct the network
        layers = [Conv(**self.conv_params(3, 16))]
        layers.append(self.module_s1(nfms[0], True))

        for nfm, stride in zip(nfms[1:], strides):
            res_module = self.module_s1(
                nfm) if stride == 1 else self.module_s2(nfm)
            layers.append(res_module)
        layers.append(BatchNorm())
        layers.append(Activation(Rectlin()))
        layers.append(Pooling('all', op='avg'))
        layers.append(
            Affine(10,
                   init=Kaiming(local=False),
                   batch_norm=True,
                   activation=Softmax()))
        self.layers = layers
        model = Model(layers=layers)
        cost = GeneralizedCost(costfunc=CrossEntropyMulti())
        model.initialize(train, cost=cost)
        self.model = model
Ejemplo n.º 2
0
def deconv_layer(name,
                 n_feature,
                 ker_size=4,
                 strides=2,
                 padding=1,
                 activation=lrelu,
                 batch_norm=True,
                 bias=None):
    """
    Layer configuration for deep-convolutional (DC) discriminator

    Arguments:
        name (string): Layer name'
        n_feature (int): Number of output feature maps
        ker_size (int): Size of convolutional kernel (defaults to 4)
        strides (int): Stride of convolution (defaults to 2)
        padding (int): Padding of convolution (defaults to 1)
        activation (object): Activation function (defaults to leaky ReLu)
        batch_norm(bool): Enable batch normalization (defaults to True)
    """
    layers = []
    layers.append(
        Deconvolution(fshape=(ker_size, ker_size, n_feature),
                      strides=strides,
                      padding=padding,
                      dilation={},
                      init=init_w,
                      bsum=batch_norm,
                      name=name))
    if batch_norm:
        layers.append(BatchNorm(name=name + '_bnorm', **bn_prm))
    if bias is not None:
        layers.append(Bias(init=None, name=name + '_bias'))
    layers.append(Activation(transform=activation, name=name + '_rectlin'))
    return layers
Ejemplo n.º 3
0
def create_network(stage_depth):
    # Structure of the deep residual part of the network:
    # stage_depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64
    nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * stage_depth)]
    strides = [
        1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])
    ]

    # Now construct the network
    layers = [Conv(**conv_params(3, 16))]
    layers.append(module_s1(nfms[0], True))

    for nfm, stride in zip(nfms[1:], strides):
        res_module = module_s1(nfm) if stride == 1 else module_s2(nfm)
        layers.append(res_module)
    layers.append(BatchNorm())
    layers.append(Activation(Rectlin()))
    layers.append(Pooling('all', op='avg'))
    layers.append(
        Affine(10,
               init=Kaiming(local=False),
               batch_norm=True,
               activation=Softmax()))

    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyMulti())
Ejemplo n.º 4
0
def module_s1(nfm, first=False):
    '''
    non-strided
    '''
    sidepath = Conv(
        **conv_params(1, nfm * 4, 1, False, False)) if first else SkipNode()
    mainpath = [] if first else [BatchNorm(), Activation(Rectlin())]
    mainpath.append(Conv(**conv_params(1, nfm)))
    mainpath.append(Conv(**conv_params(3, nfm)))
    mainpath.append(
        Conv(**conv_params(1, nfm * 4, relu=False, batch_norm=False)))

    return MergeSum([sidepath, mainpath])
Ejemplo n.º 5
0
def module_s2(nfm):
    '''
    strided
    '''
    module = [BatchNorm(), Activation(Rectlin())]
    mainpath = [
        Conv(**conv_params(1, nfm, stride=2)),
        Conv(**conv_params(3, nfm)),
        Conv(**conv_params(1, nfm * 4, relu=False, batch_norm=False))
    ]
    sidepath = [
        Conv(**conv_params(1, nfm * 4, stride=2, relu=False, batch_norm=False))
    ]
    module.append(MergeSum([sidepath, mainpath]))
    return module
Ejemplo n.º 6
0
def mlp_layer(name, nout, activation=relu, batch_norm=False, bias=None):
    """
    Layer configuration for MLP generator/discriminator

    Arguments:
        name (string): Layer name
        nout (int): Number of output feature maps
        activation (object): Activation function (defaults to ReLu)
        batch_norm(bool): Enable batch normalization (defaults to False)
    """
    layers = []
    layers.append(Linear(nout=nout, init=init_w, bsum=batch_norm, name=name))
    if batch_norm:
        layers.append(BatchNorm(name=name + '_bnorm', **bn_prm))
    if bias is not None:
        layers.append(Bias(init=None, name=name + '_bias'))
    layers.append(Activation(transform=activation, name=name + '_rectlin'))
    return layers
Ejemplo n.º 7
0
def test_model_serialize(backend):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator([X_train, X_train],
                             y_train,
                             nclass=nclass,
                             lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = [
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    path2 = [
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    layers = [
        MergeConcat([path1, path2]),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        BatchNorm(),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    save_obj(mlp.serialize(keep_states=True), tmp_save)

    # Load model
    mlp = Model(layers=layers)
    mlp.load_weights(tmp_save)

    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            else:
                assert np.allclose(p, p_e)

    os.remove(tmp_save)
Ejemplo n.º 8
0
                 batch_size=batch_size,
                 rng_seed=args.rng_seed,
                 device_id=args.device_id,
                 default_dtype=args.datatype)

(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
train_set = DataIterator(X_train, y_train, nclass=nclass)
valid_set = DataIterator(X_test, y_test, nclass=nclass)

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
layers = []
layers.append(Affine(nout=100, init=init_norm, activation=Rectlin()))
layers.append(BatchNorm())
layers.append(
    Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True)))
cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp = Model(layers=layers)

# define stopping function
# it takes as input a tuple (State,val[t])
# which describes the cumulative validation state (generated by this function)
# and the validation error at time t
# and returns as output a tuple (State', Bool),
# which represents the new state and whether to stop


def stopFunc(s, v):
    # Stop if validation error ever increases from epoch to epoch
Ejemplo n.º 9
0
init = Gaussian(scale=0.01)

# discriminiator using convolution layers
lrelu = Rectlin(slope=0.1)  # leaky relu for discriminator
# sigmoid = Logistic() # sigmoid activation function
conv1 = dict(init=init, batch_norm=False, activation=lrelu, bias=init)
conv2 = dict(init=init, batch_norm=False, activation=lrelu, padding=2, bias=init)
conv3 = dict(init=init, batch_norm=False, activation=lrelu, padding=1, bias=init)
b1 = BranchNode("b1")
b2 = BranchNode("b2")
branch1 = [   
            b1,
            Conv((5, 5, 5, 32), **conv1),
            Dropout(keep = 0.8),
            Conv((5, 5, 5, 8), **conv2),
            BatchNorm(),
            Dropout(keep = 0.8),
            Conv((5, 5, 5, 8), **conv2),
            BatchNorm(),
            Dropout(keep = 0.8),
            Conv((5, 5, 5, 8), **conv3),
            BatchNorm(),
            Dropout(keep = 0.8),
            Pooling((2, 2, 2)),
            Affine(1024, init=init, activation=lrelu),
            BatchNorm(),
            Affine(1024, init=init, activation=lrelu),
            BatchNorm(),
            b2,
            Affine(nout=1, init=init, bias=init, activation=Logistic())
            ] #real/fake