Ejemplo n.º 1
0
def run(be, fake_dilation, fsz, stride, pad, dilation):
    K = 8
    strides = stride
    padding = pad
    be.rng = be.gen_rng(be.rng_seed)

    in_shape = 16
    while out_shape(in_shape, fsz, stride, dilation, pad) < 3:
        in_shape *= 2
    train_shape = (1, in_shape, in_shape)

    inp = be.array(be.rng.randn(np.prod(train_shape), be.bsz))
    init = Gaussian()

    layers = [
        Conv((5, 5, K), init=init),
        Conv((fsz, fsz, K),
             strides=strides,
             padding=padding,
             init=init,
             dilation=dict(dil_d=1, dil_h=dilation, dil_w=dilation)),
        Conv((3, 3, K), init=init),
        Affine(nout=1, init=init)
    ]
    model = Model(layers=layers)
    cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    model.initialize(train_shape, cost)

    if fake_dilation:
        # Perform regular convolution with an expanded filter.
        weights = save(model)
        new_layers = layers
        # Replace the middle layers.
        new_fsz = dilated_fsz(fsz, dilation)
        new_layers[1] = Conv((new_fsz, new_fsz, K),
                             strides=strides,
                             padding=padding,
                             init=init)
        model = Model(layers=new_layers)
        cost = GeneralizedCost(costfunc=CrossEntropyBinary())
        model.initialize(train_shape, cost)
        load(weights, model, K, fsz, dilation)

    print(model)
    model.optimizer = GradientDescentMomentum(learning_rate=0.01,
                                              momentum_coef=0.9)
    outputs = fprop(model, inp)
    weights = bprop(model, outputs)
    model.optimizer.optimize(model.layers_to_optimize, epoch=0)
    return outputs.get(), weights.get()
Ejemplo n.º 2
0
def test_concat_sequence_l1_l1(backend_default, allrand_args, deltas_buffer):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size input steps
    nin = 128
    steps = [32, 64]
    nout = 256
    batch_size = 16
    NervanaObject.be.bsz = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for _ in (0, 1)]
    inputs = [
        be.array(dtypeu(np.random.random((nin, batch_size * step))))
        for step in steps
    ]
    merge = MergeMultistream(layers, merge="recurrent")
    assert (len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()

    merge.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    merge.set_deltas(deltas_buffer)

    out = merge.fprop(inputs).get()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.get() for layer in sublayers]
    out_exp = np.concatenate(
        [np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1)

    assert allclose_with_out(out, out_exp, atol=1e-3)

    err_lst = [
        dtypeu(np.random.random((nout, batch_size * step))) for step in steps
    ]
    err_concat = be.array(np.concatenate(err_lst, axis=1))
    merge.bprop(err_concat)
    dW_exp_lst = [
        np.dot(err,
               inp.get().T) for (err, inp) in zip(err_lst, inputs)
    ]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert allclose_with_out(layer.dW.get(), dW_exp)
    return
Ejemplo n.º 3
0
 def layers(self):
     init_uni = Uniform(low=-0.1, high=0.1)
     bn = False
     return [
         Conv((5, 5, 16),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Conv((5, 5, 32),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Affine(nout=500,
                init=init_uni,
                activation=Rectlin(),
                batch_norm=bn),
         Affine(nout=self.noutputs,
                init=init_uni,
                bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(
                    shortcut=True))
     ]
Ejemplo n.º 4
0
def test_model_get_outputs(backend_default, data):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data)
    train_set = ArrayIterator(X_train[:backend_default.bsz * 3])

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output)

    # test model benchmark inference
    mlp.benchmark(train_set, inference=True, niterations=5)
Ejemplo n.º 5
0
def run(args, train, test):
    init_uni = Uniform(low=-0.1, high=0.1)
    opt_gdm = GradientDescentMomentum(learning_rate=0.01,
                                      momentum_coef=0.9,
                                      stochastic_round=args.rounding)
    layers = [
        Conv((5, 5, 16), init=init_uni, activation=Rectlin(), batch_norm=True),
        Pooling((2, 2)),
        Conv((5, 5, 32), init=init_uni, activation=Rectlin(), batch_norm=True),
        Pooling((2, 2)),
        Affine(nout=500, init=init_uni, activation=Rectlin(), batch_norm=True),
        Affine(nout=10, init=init_uni, activation=Softmax())
    ]
    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
    mlp = Model(layers=layers)
    callbacks = Callbacks(mlp, eval_set=test, **args.callback_args)
    mlp.fit(train,
            optimizer=opt_gdm,
            num_epochs=args.epochs,
            cost=cost,
            callbacks=callbacks)
    err = mlp.eval(test, metric=Misclassification()) * 100
    print('Misclassification error = %.2f%%' % err)
    return err
Ejemplo n.º 6
0
def test_model_get_outputs(backend_default, data):
    dataset = MNIST(path=data)
    train_set = dataset.train_iter

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output[:output.shape[0], :])

    # test model benchmark inference
    mlp.benchmark(train_set, inference=True, niterations=5)
Ejemplo n.º 7
0
def prepare_model(ninputs=9600, nclass=5):
    """
    Set up and compile the model architecture (Logistic regression)
    """
    layers = [
        Affine(nout=nclass,
               init=Gaussian(loc=0.0, scale=0.01),
               activation=Softmax())
    ]

    cost = GeneralizedCost(costfunc=CrossEntropyMulti())
    opt = Adam()
    lrmodel = Model(layers=layers)

    return lrmodel, opt, cost
Ejemplo n.º 8
0
def gen_model(backend_type):
    # setup backend
    gen_backend(backend=backend_type,
                batch_size=batch_size,
                rng_seed=2,
                device_id=args.device_id,
                default_dtype=args.datatype)

    init_uni = Uniform(low=-0.1, high=0.1)

    # Set up the model layers
    layers = []
    layers.append(
        Conv((5, 5, 16), init=init_uni, bias=Constant(0),
             activation=Rectlin()))
    layers.append(Pooling(2))
    layers.append(Conv((5, 5, 32), init=init_uni, activation=Rectlin()))
    layers.append(Pooling(2))
    layers.append(Affine(nout=500, init=init_uni, activation=Rectlin()))
    layers.append(
        Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)))

    mlp = Model(layers=layers)
    return mlp
Ejemplo n.º 9
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Ejemplo n.º 10
0
def resnet(depth, num_classes, s):
    # Structure of the deep residual part of the network:
    # args.depth modules of 2 convolutional layers each at feature map depths of 16, 32, 64
    nfms = [2**(stage + 4) for stage in sorted(list(range(3)) * depth)]
    strides = [1] + [
        1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])
    ]

    # Now construct the network
    layers = [Conv(**conv_params(3, 16))]
    for nfm, stride in zip(nfms, strides):
        layers.append(module_factory(nfm, stride))
    layers.append(Pooling(s, op='avg'))
    layers.append(
        Affine(nout=num_classes,
               init=Kaiming(local=False),
               batch_norm=True,
               activation=Softmax()))

    return layers
Ejemplo n.º 11
0
def test_model_get_outputs_rnn(backend_default, data):

    data_path = load_text('ptb-valid', path=data)

    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(data_set)

    assert output.shape == (
        data_set.ndata, data_set.seq_length, data_set.nclass)
Ejemplo n.º 12
0
def test_concat_l1_l1(backend_default, allrand_args):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size inputs and outputs
    nins = [128, 1024]
    nouts = [64, 2048]
    batch_size = 16
    NervanaObject.be.bsz = NervanaObject.be.bs = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts]
    inputs = [
        be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins
    ]
    merge = MergeMultistream(layers, merge="stack")
    assert (len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()
    merge.set_deltas(None)
    out = merge.fprop(inputs).asnumpyarray()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.asnumpyarray() for layer in sublayers]
    out_exp = np.concatenate(
        [np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)])

    assert np.allclose(out, out_exp, atol=1e-3)

    err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts]
    err_concat = np.concatenate(err_lst)
    merge.bprop(be.array(err_concat))
    dW_exp_lst = [
        np.dot(err,
               inp.asnumpyarray().T) for (err, inp) in zip(err_lst, inputs)
    ]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert np.allclose(layer.dW.asnumpyarray(), dW_exp)
    return
Ejemplo n.º 13
0
def main_branch(branch_nodes):
    return [
        Conv((7, 7, 64), padding=3, strides=2, **common),
        Pooling(**pool3s2p1),
        Conv((1, 1, 64), **common),
        Conv((3, 3, 192), **commonp1),
        Pooling(**pool3s2p1),
        inception([(64, ), (96, 128), (16, 32), (32, )]),
        inception([(128, ), (128, 192), (32, 96), (64, )]),
        Pooling(**pool3s2p1),
        inception([(192, ), (96, 208), (16, 48), (64, )]), branch_nodes[0],
        inception([(160, ), (112, 224), (24, 64), (64, )]),
        inception([(128, ), (128, 256), (24, 64), (64, )]),
        inception([(112, ), (144, 288), (32, 64), (64, )]), branch_nodes[1],
        inception([(256, ), (160, 320), (32, 128), (128, )]),
        Pooling(**pool3s2p1),
        inception([(256, ), (160, 320), (32, 128), (128, )]),
        inception([(384, ), (192, 384), (48, 128), (128, )]),
        Pooling(fshape=7, strides=1, op="avg"),
        Affine(nout=1000, init=init1, activation=Softmax(), bias=Constant(0))
    ]
Ejemplo n.º 14
0
def create_model(vocab_size, rlayer_type):
    """
    Create LSTM/GRU model for bAbI dataset.

    Args:
        vocab_size (int) : String of bAbI data.
        rlayer_type (string) : Type of recurrent layer to use (gru or lstm).

    Returns:
        Model : Model of the created network
    """
    # recurrent layer parameters (default gru)
    rlayer_obj = GRU if rlayer_type == 'gru' else LSTM
    rlayer_params = dict(output_size=100,
                         reset_cells=True,
                         init=GlorotUniform(),
                         init_inner=Orthonormal(0.5),
                         activation=Tanh(),
                         gate_activation=Logistic())

    # if using lstm, swap the activation functions
    if rlayer_type == 'lstm':
        rlayer_params.update(
            dict(activation=Logistic(), gate_activation=Tanh()))

    # lookup layer parameters
    lookup_params = dict(vocab_size=vocab_size,
                         embedding_dim=50,
                         init=Uniform(-0.05, 0.05))

    # Model construction
    story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]
    query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]

    layers = [
        MergeMultistream(layers=[story_path, query_path], merge="stack"),
        Affine(vocab_size, init=GlorotUniform(), activation=Softmax())
    ]

    return Model(layers=layers)
Ejemplo n.º 15
0
def main_branch(branch_nodes):
    return [Conv((7, 7, 64), padding=3, strides=2, name='conv1/7x7_s2', **common),
            Pooling(name="pool1/3x3_s2", **pool3s2p1),
            Conv((1, 1, 64), name='conv2/3x3_reduce', **common),
            Conv((3, 3, 192), name="conv2/3x3",  **commonp1),
            Pooling(name="pool2/3x3_s2", **pool3s2p1),
            inception([(64, ), (96, 128), (16, 32), (32, )], name='inception_3a/'),
            inception([(128,), (128, 192), (32, 96), (64, )], name='inception_3b/'),
            Pooling(name='pool3/3x3_s2', **pool3s2p1),
            inception([(192,), (96, 208), (16, 48), (64, )], name='inception_4a/'),
            branch_nodes[0],
            inception([(160,), (112, 224), (24, 64), (64, )], name='inception_4b/'),
            inception([(128,), (128, 256), (24, 64), (64, )], name='inception_4c/'),
            inception([(112,), (144, 288), (32, 64), (64, )], name='inception_4d/'),
            branch_nodes[1],
            inception([(256,), (160, 320), (32, 128), (128,)], name='inception_4e/'),
            Pooling(name='pool4/3x3_s2', **pool3s2p1),
            inception([(256,), (160, 320), (32, 128), (128,)], name='inception_5a/'),
            inception([(384,), (192, 384), (48, 128), (128,)], name="inception_5b/"),
            Pooling(fshape=7, strides=1, op="avg", name='pool5/7x7_s1'),
            Affine(nout=1000, init=init1, activation=Softmax(),
                   bias=Constant(0), name='loss3/classifier')]
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})

    map_list = opt.map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Ejemplo n.º 17
0
from neon.layers import GeneralizedCost
from neon.transforms import CrossEntropyMulti
from neon.optimizers import GradientDescentMomentum, RMSProp
from neon.callbacks.callbacks import Callbacks
from neon.data import ArrayIterator
from Readfile import DataSet, readfile
be = gen_backend(backend='cpu', batch_size=30)
init_uni = Uniform(low=-0.1, high=0.1)
layers = [
    Conv(fshape=(4, 4, 16), init=init_uni, activation=Rectlin()),
    Pooling(fshape=2, strides=2),
    Conv(fshape=(4, 4, 32), init=init_uni, activation=Rectlin()),
    Pooling(fshape=2, strides=2),
    Conv(fshape=(4, 4, 32), init=init_uni, activation=Rectlin()),
    Pooling(fshape=2, strides=2),
    Affine(nout=500, init=init_uni, activation=Rectlin()),
    Affine(nout=11, init=init_uni, activation=Softmax())
]

model = Model(layers)

model.load_params('model.pkl')
data = readfile('PreImage', 'label.csv')
X_test = data.test_data
test_set = ArrayIterator(X_test, None, nclass=11, lshape=(1, 200, 200))
true = data.test_label
out = model.get_outputs(test_set)
row = len(X_test)
pred = np.zeros((row, 1))
i = 0
while i < row:
Ejemplo n.º 18
0
# download shakespeare text
data_path = load_shakespeare(path=args.data_dir)
train_path, valid_path = Text.create_valid_file(data_path)

# load data and parse on character-level
train_set = Text(time_steps, train_path)
valid_set = Text(time_steps, valid_path, vocab=train_set.vocab)

# weight initialization
init = Uniform(low=-0.08, high=0.08)

# model initialization
layers = [
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh()),
    Affine(len(train_set.vocab), init, bias=init, activation=Softmax())
]
model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))

optimizer = RMSProp(gradient_clip_value=gradient_clip_value,
                    stochastic_round=args.rounding)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# fit and validate
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
Ejemplo n.º 19
0
tiny = dict(str_h=1, str_w=1)
small = dict(str_h=1, str_w=2)
big = dict(str_h=1, str_w=4)
common = dict(batch_norm=True, activation=Rectlin())
layers = {
    1: [
        Conv((3, 5, 64), init=gauss, strides=big, **common),
        Pooling(2, strides=2),
        Conv((3, 3, 128), init=gauss, strides=small, **common),
        Pooling(2, strides=2),
        Conv((3, 3, 256), init=gauss, strides=small, **common),
        Conv((2, 2, 512), init=gauss, strides=tiny, **common),
        Conv((2, 2, 128), init=gauss, strides=tiny, **common),
        DeepBiRNN(64, init=glorot, reset_cells=True, depth=3, **common),
        RecurrentMean(),
        Affine(nout=2, init=gauss, activation=Softmax())
    ],
    2: [
        Conv((3, 5, 64), init=gauss, strides=big, **common),
        Pooling(2, strides=2),
        Dropout(0.8),
        Conv((3, 3, 128), init=gauss, strides=small, **common),
        Pooling(2, strides=2),
        Dropout(0.4),
        Conv((3, 3, 256), init=gauss, strides=small, **common),
        Dropout(0.2),
        Conv((2, 2, 512), init=gauss, strides=tiny, **common),
        Conv((2, 2, 128), init=gauss, strides=tiny, **common),
        DeepBiRNN(64, init=glorot, reset_cells=True, depth=5, **common),
        RecurrentMean(),
        Affine(nout=2, init=gauss, activation=Softmax())
Ejemplo n.º 20
0
from neon.util.argparser import NeonArgparser

# parse the command line arguments
parser = NeonArgparser(__doc__)
args = parser.parse_args()

dataset = MNIST(path=args.data_dir)
train_set = dataset.train_iter
valid_set = dataset.valid_iter

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
layers = []
layers.append(Affine(nout=100, init=init_norm, bias=Constant(0),
                     activation=Rectlin()))
layers.append(Affine(nout=10, init=init_norm, bias=Constant(0),
                     activation=Logistic(shortcut=True),
                     name='special_linear'))

cost = GeneralizedCost(costfunc=CrossEntropyBinary())
mlp = Model(layers=layers)

# fit and validate
optimizer_one = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)
optimizer_two = RMSProp()

# all bias layers and the last linear layer will use
# optimizer_two. all other layers will use optimizer_one.
opt = MultiOptimizer({'default': optimizer_one,
                      'Bias': optimizer_two,
Ejemplo n.º 21
0
    valid_set = DataIteratorSequence(time_series.test,
                                     seq_len,
                                     return_sequences=return_sequences)

    # define weights initialization
    init = GlorotUniform()  # Uniform(low=-0.08, high=0.08)

    # define model: model is different for the 2 strategies (sequence target or not)
    if return_sequences is True:
        layers = [
            LSTM(hidden,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=False),
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]
    else:
        layers = [
            LSTM(hidden,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=True),
            RecurrentLast(),
            Affine(train_set.nfeatures, init, bias=init, activation=Identity())
        ]

    model = Model(layers=layers)
    cost = GeneralizedCost(MeanSquared())
    optimizer = RMSProp(stochastic_round=args.rounding)
Ejemplo n.º 22
0
                       batch_norm=False,
                       bi_sum=False)
elif args.rlayer_type == 'bibnrnn':
    rlayer = DeepBiRNN(hidden_size,
                       g_uni,
                       activation=Tanh(),
                       depth=1,
                       reset_cells=True,
                       batch_norm=True)

layers = [
    LookupTable(vocab_size=vocab_size, embedding_dim=embedding_dim, init=uni),
    rlayer,
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(2, g_uni, bias=g_uni, activation=Softmax())
]

model = Model(layers=layers)

cost = GeneralizedCost(costfunc=CrossEntropyMulti(usebits=True))
optimizer = Adagrad(learning_rate=0.01,
                    gradient_clip_value=gradient_clip_value)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

# train model
model.fit(train_set,
          optimizer=optimizer,
          num_epochs=args.epochs,
Ejemplo n.º 23
0
# setup backend
be = gen_backend(**extract_valid_args(args, gen_backend))

# download dataset
data_path = load_flickr8k(path=args.data_dir)  # Other setnames are flickr30k and coco

# load data
train_set = ImageCaption(path=data_path, max_images=-1)

# weight initialization
init = Uniform(low=-0.08, high=0.08)
init2 = Constant(val=train_set.be.array(train_set.bias_init))

# model initialization
image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))])
sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')])

layers = [
    MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
    Dropout(keep=0.5),
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))

# configure callbacks
checkpoint_model_path = "~/image_caption2.pickle"
if args.callback_args['save_path'] is None:
    args.callback_args['save_path'] = checkpoint_model_path
Ejemplo n.º 24
0
def test_model_serialize(backend):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator([X_train, X_train],
                             y_train,
                             nclass=nclass,
                             lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = [
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    path2 = [
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ]
    layers = [
        MergeConcat([path1, path2]),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        BatchNorm(),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    save_obj(mlp.serialize(keep_states=True), tmp_save)

    # Load model
    mlp = Model(layers=layers)
    mlp.load_weights(tmp_save)

    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            else:
                assert np.allclose(p, p_e)

    os.remove(tmp_save)
Ejemplo n.º 25
0
(X_train, y_train), (X_test, y_test), nclass = mnist.load_data()
train_set = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(1, 28, 28))
valid_set = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(1, 28, 28))

# define model
nfilters = [20, 50, 500]
# nfilters = [24, 56, 500]
init_w = Gaussian(scale=0.01)
relu = Rectlin()
common_params = dict(init=init_w, activation=relu)
layers = [
    Conv((5, 5, nfilters[0]), bias=Constant(0.1), padding=0, **common_params),
    Pooling(2, strides=2, padding=0),
    Conv((5, 5, nfilters[1]), bias=Constant(0.1), padding=0, **common_params),
    Pooling(2, strides=2, padding=0),
    Affine(nout=nfilters[2], bias=Constant(0.1), **common_params),
    Affine(nout=10,
           bias=Constant(0.1),
           activation=Softmax(),
           init=Gaussian(scale=0.01))
]
model = Model(layers=layers)
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.initialize(train_set, cost)
model.load_params('models/mnist/mnist_cnn.pkl', load_states=False)

# define optimizer
opt_w = GradientDescentMomentum(learning_rate=0.01,
                                momentum_coef=0.9,
                                wdecay=0.0005)
Ejemplo n.º 26
0
init_emb = Uniform(low=-0.1 / embedding_dim, high=0.1 / embedding_dim)
nclass = 2
layers = [
    LookupTable(vocab_size=vocab_size,
                embedding_dim=embedding_dim,
                init=init_emb,
                pad_idx=0,
                update=True),
    LSTM(hidden_size,
         init_glorot,
         activation=Tanh(),
         gate_activation=Logistic(),
         reset_cells=True),
    RecurrentSum(),
    Dropout(keep=0.5),
    Affine(nclass, init_glorot, bias=init_glorot, activation=Softmax())
]

# load the weights
print("Initialized the models - ")
model_new = Model(layers=layers)
print("Loading the weights from {0}".format(args.model_weights))

model_new.load_params(args.model_weights)
model_new.initialize(dataset=(sentence_length, batch_size))

# setup buffers before accepting reviews
xdev = be.zeros((sentence_length, 1), dtype=np.int32)  # bsz is 1, feature size
xbuf = np.zeros((1, sentence_length), dtype=np.int32)
oov = 2
start = 1
Ejemplo n.º 27
0
# parse the command line arguments
parser = NeonArgparser(__doc__)
args = parser.parse_args()

# hyperparameters
num_epochs = args.epochs

(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28))

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
          Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

model = Model(layers=layers)
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

# fit and validate
optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

# configure callbacks
# parse the command line arguments
parser = NeonArgparser(__doc__)
args = parser.parse_args()

(X_train, y_train), (X_test, y_test), nclass = load_cifar10(path=args.data_dir)

train = ArrayIterator(X_train, y_train, nclass=nclass, lshape=(3, 32, 32))
test = ArrayIterator(X_test, y_test, nclass=nclass, lshape=(3, 32, 32))

init_uni = Uniform(low=-0.1, high=0.1)
opt_gdm = GradientDescentMomentum(learning_rate=0.01, momentum_coef=0.9)

# set up the model layers
layers = [
    Affine(nout=200, init=init_uni, activation=Rectlin()),
    Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True))
]

cost = GeneralizedCost(costfunc=CrossEntropyBinary())

mlp = Model(layers=layers)

# configure callbacks
callbacks = Callbacks(mlp, eval_set=test, **args.callback_args)

mlp.fit(train,
        optimizer=opt_gdm,
        num_epochs=args.epochs,
        cost=cost,
        callbacks=callbacks)
Ejemplo n.º 29
0
# setup a validation data set iterator
valid_set = DataIterator(X_test, y_test, nclass=nclass)

# setup weight initialization function
init_norm = Gaussian(loc=0.0, scale=0.01)

normrelu = dict(init=init_norm, activation=Rectlin())
normsigm = dict(init=init_norm, activation=Logistic(shortcut=True))
normsoft = dict(init=init_norm, activation=Softmax())

# setup model layers
b1 = BranchNode(name="b1")
b2 = BranchNode(name="b2")


p1 = [Affine(nout=100, linear_name="m_l1", **normrelu),
      b1,
      Affine(nout=32, linear_name="m_l2", **normrelu),
      Affine(nout=16, linear_name="m_l3", **normrelu),
      b2,
      Affine(nout=10, linear_name="m_l4", **normsoft)]

p2 = [b1,
      Affine(nout=16, linear_name="b1_l1", **normrelu),
      Affine(nout=10, linear_name="b1_l2", **normsigm)]

p3 = [b2,
      Affine(nout=16, linear_name="b2_l1", **normrelu),
      Affine(nout=10, linear_name="b2_l2", **normsigm)]

parser = NeonArgparser(__doc__)
args = parser.parse_args()

NervanaObject.be.enable_winograd = 4

# setup data provider
X_train = np.random.uniform(-1, 1, (128, 3*224*224))
y_train = np.random.uniform(-1, 1, (128, 1000))
train = ArrayIterator(X_train, y_train, nclass=1000, lshape=(3, 224, 224))

layers = [Conv((11, 11, 64), init=Gaussian(scale=0.01),
               activation=Rectlin(), padding=3, strides=4),
          Pooling(3, strides=2),
          Conv((5, 5, 192), init=Gaussian(scale=0.01), activation=Rectlin(), padding=2),
          Pooling(3, strides=2),
          Conv((3, 3, 384), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Conv((3, 3, 256), init=Gaussian(scale=0.03), activation=Rectlin(), padding=1),
          Pooling(3, strides=2),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=4096, init=Gaussian(scale=0.01), activation=Rectlin()),
          Affine(nout=1000, init=Gaussian(scale=0.01), activation=Softmax())]
model = Model(layers=layers)

weight_sched = Schedule([22, 44, 65], (1/250.)**(1/3.))
opt_gdm = GradientDescentMomentum(0.01, 0.0, wdecay=0.0005, schedule=weight_sched)
opt = MultiOptimizer({'default': opt_gdm})
cost = GeneralizedCost(costfunc=CrossEntropyMulti())

model.benchmark(train, cost=cost, optimizer=opt, niterations=10, nskip=5)