Exemple #1
0
def test_concat_l1_l1(backend_default, allrand_args):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size inputs and outputs
    nins = [128, 1024]
    nouts = [64, 2048]
    batch_size = 16
    NervanaObject.be.bsz = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts]
    inputs = [be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins]
    merge = MergeMultistream(layers, merge="stack")
    assert(len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()
    merge.set_deltas(None)
    out = merge.fprop(inputs).get()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.get() for layer in sublayers]
    out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)])

    assert np.allclose(out, out_exp, atol=1e-3)

    err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts]
    err_concat = np.concatenate(err_lst)
    merge.bprop(be.array(err_concat))
    dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert np.allclose(layer.dW.get(), dW_exp)
    return
Exemple #2
0
    def __init__(self):
        self.in_shape = [1024, (2538, 38)]

        init = Constant(0)
        image_path = Sequential(
            [Affine(20, init, bias=init),
             Affine(10, init, bias=init)])
        sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)])

        layers = [
            MergeMultistream(layers=[image_path, sent_path],
                             merge="recurrent"),
            Dropout(keep=0.5),
            LSTM(4,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=True),
            Affine(20, init, bias=init, activation=Softmax())
        ]
        self.layers = layers
        self.cost = GeneralizedCostMask(CrossEntropyMulti())

        self.model = Model(layers=layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Exemple #3
0
def create_model(vocab_size, rlayer_type):
    """
    Create LSTM/GRU model for bAbI dataset.

    Args:
        vocab_size (int) : String of bAbI data.
        rlayer_type (string) : Type of recurrent layer to use (gru or lstm).

    Returns:
        Model : Model of the created network
    """
    # recurrent layer parameters (default gru)
    rlayer_obj = GRU if rlayer_type == 'gru' else LSTM
    rlayer_params = dict(output_size=100, reset_cells=True,
                         init=GlorotUniform(), init_inner=Orthonormal(0.5),
                         activation=Tanh(), gate_activation=Logistic())

    # if using lstm, swap the activation functions
    if rlayer_type == 'lstm':
        rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh()))

    # lookup layer parameters
    lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05))

    # Model construction
    story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]
    query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]

    layers = [MergeMultistream(layers=[story_path, query_path], merge="stack"),
              Affine(vocab_size, init=GlorotUniform(), activation=Softmax())]

    return Model(layers=layers)
Exemple #4
0
    def input_layers(self, analytics_input, init, activation, gate):
        """
        return the input layers. we currently support convolutional and LSTM
        :return:
        """
        if self.recurrent:
            if analytics_input:
                # support analytics + content
                input_layers = MergeMultistream([[
                    LSTM(300,
                         init,
                         init_inner=Kaiming(),
                         activation=activation,
                         gate_activation=gate,
                         reset_cells=True),
                    RecurrentSum()
                ], [Affine(30, init, activation=activation)]], 'stack')
            else:
                # content only
                input_layers = [
                    LSTM(300,
                         init,
                         init_inner=Kaiming(),
                         activation=activation,
                         gate_activation=gate,
                         reset_cells=True),
                    RecurrentSum()
                ]
        else:
            if analytics_input:
                # support analytics + content
                input_layers = MergeMultistream([
                    self.conv_net(activation),
                    [Affine(30, init, activation=Logistic())]
                ], 'stack')
            else:
                # content only
                input_layers = self.conv_net(activation)

        return input_layers
def test_concat_sequence_l1_l1(backend_default, allrand_args):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size input steps
    nin = 128
    steps = [32, 64]
    nout = 256
    batch_size = 16
    NervanaObject.be.bsz = NervanaObject.be.bs = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for _ in range(2)]
    inputs = [be.array(dtypeu(np.random.random((nin, batch_size*step))))
              for step in steps]
    merge = MergeMultistream(layers, merge="recurrent")
    assert(len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()
    merge.set_deltas(None)
    out = merge.fprop(inputs).asnumpyarray()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.asnumpyarray() for layer in sublayers]
    out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1)

    assert np.allclose(out, out_exp, atol=1e-3)

    err_lst = [dtypeu(np.random.random((nout, batch_size*step))) for step in steps]
    err_concat = be.array(np.concatenate(err_lst, axis=1))
    merge.bprop(err_concat)
    dW_exp_lst = [np.dot(err, inp.asnumpyarray().T) for (err, inp) in zip(err_lst, inputs)]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert np.allclose(layer.dW.asnumpyarray(), dW_exp)
    return
Exemple #6
0
# hyperparameters
num_epochs = args.epochs

(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28))

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
          Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

model = Model(layers=layers)
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

# fit and validate
optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)
# download dataset
data_path = load_flickr8k(path=args.data_dir)  # Other setnames are flickr30k and coco

# load data
train_set = ImageCaption(path=data_path, max_images=-1)

# weight initialization
init = Uniform(low=-0.08, high=0.08)
init2 = Constant(val=train_set.be.array(train_set.bias_init))

# model initialization
image_path = Sequential([Affine(hidden_size, init, bias=Constant(val=0.0))])
sent_path = Sequential([Affine(hidden_size, init, linear_name='sent')])

layers = [
    MergeMultistream(layers=[image_path, sent_path], merge="recurrent"),
    Dropout(keep=0.5),
    LSTM(hidden_size, init, activation=Logistic(), gate_activation=Tanh(), reset_cells=True),
    Affine(train_set.vocab_size, init, bias=init2, activation=Softmax())
]

cost = GeneralizedCostMask(costfunc=CrossEntropyMulti(usebits=True))

# configure callbacks
checkpoint_model_path = "~/image_caption2.pickle"
if args.callback_args['save_path'] is None:
    args.callback_args['save_path'] = checkpoint_model_path

if args.callback_args['serialize'] is None:
    args.callback_args['serialize'] = 1
Exemple #8
0
def test_concat_sequence_l1_l1(backend_default, allrand_args):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size input steps
    nin = 128
    steps = [32, 64]
    nout = 256
    batch_size = 16
    NervanaObject.be.bsz = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for _ in (0, 1)]
    inputs = [be.array(dtypeu(np.random.random((nin, batch_size * step))))
              for step in steps]
    merge = MergeMultistream(layers, merge="recurrent")
    assert(len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()
    merge.set_deltas(None)
    out = merge.fprop(inputs).get()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.get() for layer in sublayers]
    out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)], axis=1)

    assert np.allclose(out, out_exp, atol=1e-3)

    err_lst = [dtypeu(np.random.random((nout, batch_size * step))) for step in steps]
    err_concat = be.array(np.concatenate(err_lst, axis=1))
    merge.bprop(err_concat)
    dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert np.allclose(layer.dW.get(), dW_exp)
    return
Exemple #9
0
def test_model_serialize(backend_default, data):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist(path=data)

    train_set = ArrayIterator([X_train, X_train],
                              y_train,
                              nclass=nclass,
                              lshape=(1, 28, 28))

    init_norm = Gaussian(loc=0.0, scale=0.01)

    # initialize model
    path1 = Sequential([
        Conv((5, 5, 16),
             init=init_norm,
             bias=Constant(0),
             activation=Rectlin()),
        Pooling(2),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    path2 = Sequential([
        Affine(nout=100,
               init=init_norm,
               bias=Constant(0),
               activation=Rectlin()),
        Dropout(keep=0.5),
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin())
    ])
    layers = [
        MergeMultistream(layers=[path1, path2], merge="stack"),
        Affine(nout=20, init=init_norm, batch_norm=True, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]

    tmp_save = 'test_model_serialize_tmp_save.pickle'
    mlp = Model(layers=layers)
    mlp.optimizer = GradientDescentMomentum(learning_rate=0.1,
                                            momentum_coef=0.9)
    mlp.cost = GeneralizedCost(costfunc=CrossEntropyBinary())
    mlp.initialize(train_set, cost=mlp.cost)
    n_test = 3
    num_epochs = 3
    # Train model for num_epochs and n_test batches
    for epoch in range(num_epochs):
        for i, (x, t) in enumerate(train_set):
            x = mlp.fprop(x)
            delta = mlp.cost.get_errors(x, t)
            mlp.bprop(delta)
            mlp.optimizer.optimize(mlp.layers_to_optimize, epoch=epoch)
            if i > n_test:
                break

    # Get expected outputs of n_test batches and states of all layers
    outputs_exp = []
    pdicts_exp = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs_exp.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Serialize model
    mlp.save_params(tmp_save, keep_states=True)

    # Load model
    mlp = Model(tmp_save)

    mlp.initialize(train_set)
    outputs = []
    pdicts = [l.get_params_serialize() for l in mlp.layers_to_optimize]
    for i, (x, t) in enumerate(train_set):
        outputs.append(mlp.fprop(x, inference=True))
        if i > n_test:
            break

    # Check outputs, states, and params are the same
    for output, output_exp in zip(outputs, outputs_exp):
        assert np.allclose(output.get(), output_exp.get())

    for pd, pd_exp in zip(pdicts, pdicts_exp):
        for s, s_e in zip(pd['states'], pd_exp['states']):
            if isinstance(s, list):  # this is the batch norm case
                for _s, _s_e in zip(s, s_e):
                    assert np.allclose(_s, _s_e)
            else:
                assert np.allclose(s, s_e)
        for p, p_e in zip(pd['params'], pd_exp['params']):
            assert type(p) == type(p_e)
            if isinstance(p, list):  # this is the batch norm case
                for _p, _p_e in zip(p, p_e):
                    assert np.allclose(_p, _p_e)
            elif isinstance(p, np.ndarray):
                assert np.allclose(p, p_e)
            else:
                assert p == p_e

    os.remove(tmp_save)
Exemple #10
0
# if using lstm, swap the activation functions
if args.rlayer_type == 'lstm':
    rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh()))

# lookup layer parameters
lookup_params = dict(vocab_size=babi.vocab_size,
                     embedding_dim=50,
                     init=Uniform(-0.05, 0.05))

# Model construction
story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]
query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]

layers = [
    MergeMultistream(layers=[story_path, query_path], merge="stack"),
    Affine(babi.vocab_size, init=GlorotUniform(), activation=Softmax())
]

model = Model(layers=layers)

# setup callbacks
callbacks = Callbacks(model,
                      train_set,
                      eval_set=valid_set,
                      **args.callback_args)

# train model
model.fit(train_set,
          optimizer=Adam(),
          num_epochs=args.epochs,
def test_concat_l1_l1(backend_default, allrand_args, deltas_buffer):
    # test two linear layers that are merged with concat
    dtypeu = np.float32
    w_rng, rngmax = allrand_args
    # Diff size inputs and outputs
    nins = [128, 1024]
    nouts = [64, 2048]
    batch_size = 16
    NervanaObject.be.bsz = batch_size
    be = NervanaObject.be

    init_unif = Uniform(low=w_rng[0], high=w_rng[1])
    layers = [Sequential(Affine(nout=nout, init=init_unif)) for nout in nouts]
    inputs = [be.array(dtypeu(np.random.random((nin, batch_size)))) for nin in nins]
    merge = MergeMultistream(layers, merge="stack")
    assert(len(inputs) == len(layers))
    merge.configure(inputs)
    merge.allocate()

    merge.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    merge.set_deltas(deltas_buffer)

    out = merge.fprop(inputs).get()

    sublayers = [s.layers[0] for s in layers]
    weights = [layer.W.get() for layer in sublayers]
    out_exp = np.concatenate([np.dot(w, inp.get()) for (w, inp) in zip(weights, inputs)])

    assert allclose_with_out(out, out_exp, atol=1e-3)

    err_lst = [dtypeu(np.random.random((nout, batch_size))) for nout in nouts]
    err_concat = np.concatenate(err_lst)
    merge.bprop(be.array(err_concat))
    dW_exp_lst = [np.dot(err, inp.get().T) for (err, inp) in zip(err_lst, inputs)]

    for layer, dW_exp in zip(sublayers, dW_exp_lst):
        assert allclose_with_out(layer.dW.get(), dW_exp)
    return
Exemple #12
0
    def __init__(self,
                 sentence_length,
                 token_vocab_size,
                 pos_vocab_size=None,
                 char_vocab_size=None,
                 max_char_word_length=20,
                 token_embedding_size=None,
                 pos_embedding_size=None,
                 char_embedding_size=None,
                 num_labels=None,
                 lstm_hidden_size=100,
                 num_lstm_layers=1,
                 use_external_embedding=None,
                 dropout=0.5):

        init = GlorotUniform()
        tokens = []
        if use_external_embedding is None:
            tokens.append(
                LookupTable(vocab_size=token_vocab_size,
                            embedding_dim=token_embedding_size,
                            init=init,
                            pad_idx=0))
        else:
            tokens.append(DataInput())
        tokens.append(Reshape((-1, sentence_length)))
        f_layers = [tokens]

        # add POS tag input
        if pos_vocab_size is not None and pos_embedding_size is not None:
            f_layers.append([
                LookupTable(vocab_size=pos_vocab_size,
                            embedding_dim=pos_embedding_size,
                            init=init,
                            pad_idx=0),
                Reshape((-1, sentence_length))
            ])

        # add Character RNN input
        if char_vocab_size is not None and char_embedding_size is not None:
            char_lut_layer = LookupTable(vocab_size=char_vocab_size,
                                         embedding_dim=char_embedding_size,
                                         init=init,
                                         pad_idx=0)
            char_nn = [
                char_lut_layer,
                TimeDistBiLSTM(char_embedding_size,
                               init,
                               activation=Logistic(),
                               gate_activation=Tanh(),
                               reset_cells=True,
                               reset_freq=max_char_word_length),
                TimeDistributedRecurrentLast(timesteps=max_char_word_length),
                Reshape((-1, sentence_length))
            ]

            f_layers.append(char_nn)

        layers = []
        if len(f_layers) == 1:
            layers.append(f_layers[0][0])
        else:
            layers.append(MergeMultistream(layers=f_layers, merge="stack"))
            layers.append(Reshape((-1, sentence_length)))
        layers += [
            DeepBiLSTM(lstm_hidden_size,
                       init,
                       activation=Logistic(),
                       gate_activation=Tanh(),
                       reset_cells=True,
                       depth=num_lstm_layers),
            Dropout(keep=dropout),
            Affine(num_labels, init, bias=init, activation=Softmax())
        ]
        self._model = Model(layers=layers)