Esempio n. 1
0
def create_model(vocab_size, rlayer_type):
    """
    Create LSTM/GRU model for bAbI dataset.

    Args:
        vocab_size (int) : String of bAbI data.
        rlayer_type (string) : Type of recurrent layer to use (gru or lstm).

    Returns:
        Model : Model of the created network
    """
    # recurrent layer parameters (default gru)
    rlayer_obj = GRU if rlayer_type == 'gru' else LSTM
    rlayer_params = dict(output_size=100, reset_cells=True,
                         init=GlorotUniform(), init_inner=Orthonormal(0.5),
                         activation=Tanh(), gate_activation=Logistic())

    # if using lstm, swap the activation functions
    if rlayer_type == 'lstm':
        rlayer_params.update(dict(activation=Logistic(), gate_activation=Tanh()))

    # lookup layer parameters
    lookup_params = dict(vocab_size=vocab_size, embedding_dim=50, init=Uniform(-0.05, 0.05))

    # Model construction
    story_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]
    query_path = [LookupTable(**lookup_params), rlayer_obj(**rlayer_params)]

    layers = [MergeMultistream(layers=[story_path, query_path], merge="stack"),
              Affine(vocab_size, init=GlorotUniform(), activation=Softmax())]

    return Model(layers=layers)
Esempio n. 2
0
    def __init__(self,
                 overlapping_classes=None,
                 exclusive_classes=None,
                 analytics_input=True,
                 network_type='conv_net',
                 num_words=60,
                 width=100,
                 lookup_size=0,
                 lookup_dim=0,
                 optimizer=Adam()):
        assert (overlapping_classes is not None) or (exclusive_classes
                                                     is not None)

        self.width = width
        self.num_words = num_words
        self.overlapping_classes = overlapping_classes
        self.exclusive_classes = exclusive_classes
        self.analytics_input = analytics_input
        self.recurrent = network_type == 'lstm'
        self.lookup_size = lookup_size
        self.lookup_dim = lookup_dim

        init = GlorotUniform()
        activation = Rectlin(slope=1E-05)
        gate = Logistic()

        input_layers = self.input_layers(analytics_input, init, activation,
                                         gate)

        if self.overlapping_classes is None:
            output_layers = [
                Affine(len(self.exclusive_classes), init, activation=Softmax())
            ]
        elif self.exclusive_classes is None:
            output_layers = [
                Affine(len(self.overlapping_classes),
                       init,
                       activation=Logistic())
            ]
        else:
            output_branch = BranchNode(name='exclusive_overlapping')
            output_layers = Tree([[
                SkipNode(), output_branch,
                Affine(len(self.exclusive_classes), init, activation=Softmax())
            ],
                                  [
                                      output_branch,
                                      Affine(len(self.overlapping_classes),
                                             init,
                                             activation=Logistic())
                                  ]])
        layers = [
            input_layers,
            # this is where inputs meet, and where we may want to add depth or
            # additional functionality
            Dropout(keep=0.8),
            output_layers
        ]
        super(ClassifierNetwork, self).__init__(layers, optimizer=optimizer)
Esempio n. 3
0
def test_multi_optimizer(backend_default):
    opt_gdm = GradientDescentMomentum(learning_rate=0.001,
                                      momentum_coef=0.9,
                                      wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64),
              strides=4,
              padding=3,
              init=init_one,
              bias=Constant(0),
              activation=Rectlin())
    l2 = Affine(nout=4096,
                init=init_one,
                bias=Constant(1),
                activation=Rectlin())
    l3 = LSTM(output_size=1000,
              init=init_one,
              activation=Logistic(),
              gate_activation=Tanh())
    l4 = GRU(output_size=100,
             init=init_one,
             activation=Logistic(),
             gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)

    opt = MultiOptimizer({
        'default': opt_gdm,
        'Bias': opt_ada,
        'Convolution': opt_adam,
        'Linear': opt_rms,
        'LSTM': opt_rms_1,
        'GRU': opt_rms_1
    })

    map_list = opt._map_optimizers(layer_list)
    assert map_list[opt_adam][0].__class__.__name__ == 'Convolution'
    assert map_list[opt_ada][0].__class__.__name__ == 'Bias'
    assert map_list[opt_rms][0].__class__.__name__ == 'Linear'
    assert map_list[opt_gdm][0].__class__.__name__ == 'Activation'
    assert map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Esempio n. 4
0
def test_biRNN_bprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size, activation=Logistic(), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()
    birnn.set_deltas([birnn.be.iobuf(birnn.in_shape)])

    # same weight for bi-rnn backward and rnn weights
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # same weight for bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = Recurrent(hidden_size, activation=Logistic(), init=init_glorot)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()
    rnn.set_deltas([rnn.be.iobuf(rnn.in_shape)])

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr_g = birnn.fprop(inp_lr)
    del_lr = birnn.bprop(out_lr_g).get().copy()
    birnn.h_buffer[:] = 0
    out_rl_g = birnn.fprop(inp_rl)
    del_rl = birnn.bprop(out_rl_g).get().copy()

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)
    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
Esempio n. 5
0
 def __init__(self, backend, dataset, subj):
     ad = {
         'type': 'adadelta',
         'lr_params': {
             'rho': 0.9,
             'epsilon': 0.000000001
         }
     }
     self.layers = []
     self.add(
         DataLayer(is_local=True,
                   nofm=dataset.nchannels,
                   ofmshape=[1, dataset.nsamples]))
     self.add(
         ConvLayer(nofm=64,
                   fshape=[1, 3],
                   activation=RectLin(),
                   lrule_init=ad))
     self.add(PoolingLayer(op='max', fshape=[1, 2], stride=2))
     if subj != 2:
         self.add(FCLayer(nout=128, activation=RectLin(), lrule_init=ad))
     self.add(
         FCLayer(nout=dataset.nclasses,
                 activation=Logistic(),
                 lrule_init=ad))
     self.add(CostLayer(cost=CrossEntropy()))
     self.model = MLP(num_epochs=1, batch_size=128, layers=self.layers)
     self.dataset = dataset
Esempio n. 6
0
def test_logistic_derivative(backend_default):
    # bprop is on the output
    inputs = np.array([0, 1, -2]).reshape((3, 1))
    inputs = 1.0 / (1.0 + np.exp(-inputs))
    outputs = inputs * (1.0 - inputs)
    compare_tensors(Logistic(shortcut=False),
                    inputs, outputs, deriv=True, tol=1e-7)
Esempio n. 7
0
def gradient_calc(seq_len, input_size, hidden_size, batch_size,
                  epsilon=None, rand_scale=None, inp_bl=None):
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    input_shape = (input_size, seq_len * batch_size)

    # generate input if one is not given
    if inp_bl is None:
        inp_bl = np.random.randn(*input_shape)

    # neon gru instance
    gru = GRU(hidden_size, init=Gaussian(), activation=Tanh(), gate_activation=Logistic())
    inpa = gru.be.array(np.copy(inp_bl))

    # run fprop on the baseline input
    gru.configure((input_size, seq_len))
    gru.prev_layer = True
    gru.allocate()
    gru.set_deltas([gru.be.iobuf(gru.in_shape)])
    out_bl = gru.fprop(inpa).get()

    # random scaling/hash to generate fake loss
    if rand_scale is None:
        rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0
    # loss function would be:
    # loss_bl = np.sum(rand_scale * out_bl)

    # run back prop with rand_scale as the errors
    # use copy to avoid any interactions
    deltas_neon = gru.bprop(gru.be.array(np.copy(rand_scale))).get()

    # add a perturbation to each input element
    grads_est = np.zeros(inpa.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inpa.size):
        save_val = inp_pert.flat[pert_ind]

        inp_pert.flat[pert_ind] = save_val + epsilon
        reset_gru(gru)
        gru.allocate()
        out_pos = gru.fprop(gru.be.array(inp_pert)).get()

        inp_pert.flat[pert_ind] = save_val - epsilon
        reset_gru(gru)
        gru.allocate()
        out_neg = gru.fprop(gru.be.array(inp_pert)).get()

        # calculate the loss with perturbations
        loss_pos = np.sum(rand_scale * out_pos)
        loss_neg = np.sum(rand_scale * out_neg)
        # compute the gradient estimate
        grad = 0.5 / float(epsilon) * (loss_pos - loss_neg)

        grads_est.flat[pert_ind] = grad

        # reset the perturbed input element
        inp_pert.flat[pert_ind] = save_val

    del gru
    return (grads_est, deltas_neon)
Esempio n. 8
0
    def __init__(self):
        self.in_shape = [1024, (2538, 38)]

        init = Constant(0)
        image_path = Sequential(
            [Affine(20, init, bias=init),
             Affine(10, init, bias=init)])
        sent_path = Sequential([Affine(30, init, bias=init), Affine(10, init)])

        layers = [
            MergeMultistream(layers=[image_path, sent_path],
                             merge="recurrent"),
            Dropout(keep=0.5),
            LSTM(4,
                 init,
                 activation=Logistic(),
                 gate_activation=Tanh(),
                 reset_cells=True),
            Affine(20, init, bias=init, activation=Softmax())
        ]
        self.layers = layers
        self.cost = GeneralizedCostMask(CrossEntropyMulti())

        self.model = Model(layers=layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Esempio n. 9
0
 def layers(self):
     init_uni = Uniform(low=-0.1, high=0.1)
     bn = False
     return [
         DOG((5.0, 4.0, 3.0, 1.6), 1.8),
         Conv((5, 5, 16),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Conv((5, 5, 32),
              init=init_uni,
              activation=Rectlin(),
              batch_norm=bn),
         Pooling((2, 2)),
         Affine(nout=500,
                init=init_uni,
                activation=Rectlin(),
                batch_norm=bn),
         Affine(nout=self.noutputs,
                init=init_uni,
                bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(
                    shortcut=True))
     ]
Esempio n. 10
0
def test_model_get_outputs_rnn(backend_default, data):

    data_path = load_ptb_test(path=data)
    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(data_set)

    assert output.shape == (data_set.ndata, data_set.seq_length,
                            data_set.nclass)

    # since the init are all constant and model is un-trained:
    # along the feature dim, the values should be all the same
    assert np.allclose(output[0, 0], output[0, 0, 0], rtol=0, atol=1e-5)
    assert np.allclose(output[0, 1], output[0, 1, 0], rtol=0, atol=1e-5)

    # along the time dim, the values should be increasing:
    assert np.alltrue(output[0, 2] > output[0, 1])
    assert np.alltrue(output[0, 1] > output[0, 0])
Esempio n. 11
0
def main(args):
    # load up the mnist data set
    dataset = MNIST(path=args.data_dir)

    # initialize model object
    mlp = Model(layers=[
        Affine(nout=100,
               init=Gaussian(loc=0.0, scale=0.01),
               activation=Rectlin()),
        Affine(nout=10,
               init=Gaussian(loc=0.0, scale=0.01),
               activation=Logistic(shortcut=True))
    ])

    # setup optimizer
    optimizer = GradientDescentMomentum(0.1,
                                        momentum_coef=0.9,
                                        stochastic_round=args.rounding)

    # configure callbacks
    callbacks = Callbacks(mlp,
                          eval_set=dataset.valid_iter,
                          **args.callback_args)

    # run fit
    # setup cost function as CrossEntropy
    mlp.fit(dataset.train_iter,
            optimizer=optimizer,
            num_epochs=args.epochs,
            cost=GeneralizedCost(costfunc=CrossEntropyBinary()),
            callbacks=callbacks)
    error_rate = mlp.eval(dataset.valid_iter, metric=Misclassification())
    neon_logger.display('Classification accuracy = %.4f' % (1 - error_rate))
Esempio n. 12
0
    def build_model(self):
        # setup weight initialization function
        init_norm = Gaussian(loc=0.0, scale=0.01)

        # setup model layers
        layers = [
            Affine(nout=100,
                   init=init_norm,
                   bias=Uniform(),
                   activation=Rectlin()),
            Affine(nout=10,
                   init=init_norm,
                   bias=Uniform(),
                   activation=Logistic(shortcut=True))
        ]

        # setup cost function as CrossEntropy
        self.cost = GeneralizedCost(costfunc=CrossEntropyBinary())

        # setup optimizer
        self.optimizer = GradientDescentMomentum(
            0.1, momentum_coef=0.9, stochastic_round=self.args.rounding)

        # initialize model object
        self.model = ModelDist(layers=layers)
Esempio n. 13
0
def test_biLSTM_bprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size, gate_activation=Logistic(),
                    activation=Tanh(), init=init_glorot, reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()
    bilstm.set_deltas([bilstm.be.iobuf(bilstm.in_shape)])

    # same weight for bi-rnn backward and rnn weights
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr_g = bilstm.fprop(inp_lr)
    out_lr = out_lr_g.get().copy()
    del_lr = bilstm.bprop(out_lr_g).get().copy()
    bilstm.h_buffer[:] = 0
    out_rl_g = bilstm.fprop(inp_rl)
    out_rl = out_rl_g.get().copy()
    del_rl = bilstm.bprop(out_rl_g).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s,
                                  reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5)

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)

    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
Esempio n. 14
0
def gradient_calc(seq_len,
                  input_size,
                  hidden_size,
                  batch_size,
                  epsilon=None,
                  rand_scale=None,
                  inp_bl=None):
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    input_shape = (input_size, seq_len * batch_size)

    # generate input if one is not given
    if inp_bl is None:
        inp_bl = np.random.randn(*input_shape)

    # neon lstm instance
    lstm = LSTM(hidden_size, Gaussian(), Tanh(), Logistic())
    inpa = lstm.be.array(np.copy(inp_bl))

    # run fprop on the baseline input
    out_bl = lstm.fprop(inpa).get()

    # random scaling/hash to generate fake loss
    if rand_scale is None:
        rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0
    # loss function would be:
    # loss_bl = np.sum(rand_scale * out_bl)

    # run back prop with rand_scale as the errors
    # use copy to avoid any interactions
    deltas_neon = lstm.bprop(lstm.be.array(np.copy(rand_scale))).get()

    # add a perturbation to each input element
    grads_est = np.zeros(inpa.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inpa.size):
        save_val = inp_pert.flat[pert_ind]

        inp_pert.flat[pert_ind] = save_val + epsilon
        reset_lstm(lstm)
        out_pos = lstm.fprop(lstm.be.array(inp_pert)).get()

        inp_pert.flat[pert_ind] = save_val - epsilon
        reset_lstm(lstm)
        out_neg = lstm.fprop(lstm.be.array(inp_pert)).get()

        # calculate the loss with perturbations
        loss_pos = np.sum(rand_scale * out_pos)
        loss_neg = np.sum(rand_scale * out_neg)
        # compute the gradient estimate
        grad = 0.5 * (loss_pos - loss_neg) / epsilon

        grads_est.flat[pert_ind] = grad

        # reset the perturbed input element
        inp_pert.flat[pert_ind] = save_val

    del lstm
    return (grads_est, deltas_neon)
Esempio n. 15
0
def create_model(nin):
    layers = []
    layers.append(DataLayer(nout=nin))
    layers.append(FCLayer(nout=100, activation=RectLin()))
    layers.append(FCLayer(nout=10, activation=Logistic()))
    layers.append(CostLayer(cost=CrossEntropy()))
    model = MLP(num_epochs=10, batch_size=128, layers=layers)
    return model
Esempio n. 16
0
def test_multi_optimizer(backend_default_mkl):
    """
    A test for MultiOptimizer.
    """
    opt_gdm = GradientDescentMomentum(
        learning_rate=0.001, momentum_coef=0.9, wdecay=0.005)
    opt_ada = Adadelta()
    opt_adam = Adam()
    opt_rms = RMSProp()
    opt_rms_1 = RMSProp(gradient_clip_value=5)
    init_one = Gaussian(scale=0.01)

    l1 = Conv((11, 11, 64), strides=4, padding=3,
              init=init_one, bias=Constant(0), activation=Rectlin())
    l2 = Affine(nout=4096, init=init_one,
                bias=Constant(1), activation=Rectlin())
    l3 = LSTM(output_size=1000, init=init_one, activation=Logistic(), gate_activation=Tanh())
    l4 = GRU(output_size=100, init=init_one, activation=Logistic(), gate_activation=Tanh())
    layers = [l1, l2, l3, l4]
    layer_list = []
    for layer in layers:
        if isinstance(layer, list):
            layer_list.extend(layer)
        else:
            layer_list.append(layer)
    for l in layer_list:
        l.configure(in_obj=(16, 28, 28))
        l.allocate()
    # separate layer_list into two, the last two recurrent layers and the rest
    layer_list1, layer_list2 = layer_list[:-2], layer_list[-2:]
    opt = MultiOptimizer({'default': opt_gdm,
                          'Bias': opt_ada,
                          'Convolution': opt_adam,
                          'Convolution_bias': opt_adam,
                          'Linear': opt_rms,
                          'LSTM': opt_rms_1,
                          'GRU': opt_rms_1})
    layers_to_optimize1 = [l for l in layer_list1 if isinstance(l, ParameterLayer)]
    layers_to_optimize2 = [l for l in layer_list2 if isinstance(l, ParameterLayer)]
    opt.optimize(layers_to_optimize1, 0)
    assert opt.map_list[opt_adam][0].__class__.__name__ is 'Convolution_bias'
    assert opt.map_list[opt_rms][0].__class__.__name__ == 'Linear'
    opt.optimize(layers_to_optimize2, 0)
    assert opt.map_list[opt_rms_1][0].__class__.__name__ == 'LSTM'
    assert opt.map_list[opt_rms_1][1].__class__.__name__ == 'GRU'
Esempio n. 17
0
 def layers(self):
     return [
         Conv((7, 7, 96),
              init=Gaussian(scale=0.0001),
              bias=Constant(0),
              activation=Rectlin(),
              padding=3,
              strides=1),
         LRN(31, ascale=0.001, bpower=0.75),
         Pooling(3, strides=2, padding=1),
         Conv((5, 5, 256),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=2,
              strides=1),
         LRN(31, ascale=0.001, bpower=0.75),
         Pooling(3, strides=2, padding=1),
         Conv((3, 3, 384),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Conv((3, 3, 384),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Conv((3, 3, 256),
              init=Gaussian(scale=0.01),
              bias=Constant(0),
              activation=Rectlin(),
              padding=1,
              strides=1),
         Pooling(3, strides=2, padding=1),
         Affine(nout=4096,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Identity()),
         Dropout(keep=0.5),
         Affine(nout=4096,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Identity()),
         Dropout(keep=0.5),
         Affine(nout=self.noutputs,
                init=Gaussian(scale=0.01),
                bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(
                    shortcut=True))
     ]
Esempio n. 18
0
def test_biLSTM_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size,
                    gate_activation=Logistic(),
                    init=init_glorot,
                    activation=Tanh(),
                    reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()

    # same weight
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr = bilstm.fprop(inp_lr).get().copy()

    bilstm.h_buffer[:] = 0
    out_rl = bilstm.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s),
                                  reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
Esempio n. 19
0
def test_model_N_S_setter(backend_default):

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, activation=Logistic()),
        Affine(100, init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    model.set_batch_size(20)
    model.set_seq_len(10)
Esempio n. 20
0
    def __init__(self):
        self.in_shape = (1, 32, 32)

        init_norm = Gaussian(loc=0.0, scale=0.01)

        normrelu = dict(init=init_norm, activation=Rectlin())
        normsigm = dict(init=init_norm, activation=Logistic(shortcut=True))
        normsoft = dict(init=init_norm, activation=Softmax())

        # setup model layers
        b1 = BranchNode(name="b1")
        b2 = BranchNode(name="b2")

        p1 = [
            Affine(nout=100, name="main1", **normrelu),
            b1,
            Affine(nout=32, name="main2", **normrelu),
            Affine(nout=160, name="main3", **normrelu),
            b2,
            Affine(nout=32, name="main2", **normrelu),
            # make next layer big to check sizing
            Affine(nout=320, name="main2", **normrelu),
            Affine(nout=10, name="main4", **normsoft)
        ]

        p2 = [
            b1,
            Affine(nout=16, name="branch1_1", **normrelu),
            Affine(nout=10, name="branch1_2", **normsigm)
        ]

        p3 = [
            b2,
            Affine(nout=16, name="branch2_1", **normrelu),
            Affine(nout=10, name="branch2_2", **normsigm)
        ]

        self.cost = Multicost(costs=[
            GeneralizedCost(costfunc=CrossEntropyMulti()),
            GeneralizedCost(costfunc=CrossEntropyBinary()),
            GeneralizedCost(costfunc=CrossEntropyBinary())
        ],
                              weights=[1, 0., 0.])

        self.layers = SingleOutputTree([p1, p2, p3], alphas=[1, .2, .2])
        self.model = Model(layers=self.layers)
        self.model.initialize(self.in_shape, cost=self.cost)
Esempio n. 21
0
def test_model_get_outputs(backend_default):
    (X_train, y_train), (X_test, y_test), nclass = load_mnist()
    train_set = DataIterator(X_train[:backend_default.bsz * 3])

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
              Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output)
Esempio n. 22
0
def test_model_get_outputs_rnn(backend_default, data):

    data_path = load_text('ptb-valid', path=data)

    data_set = Text(time_steps=50, path=data_path)

    # weight initialization
    init = Constant(0.08)

    # model initialization
    layers = [
        Recurrent(150, init, Logistic()),
        Affine(len(data_set.vocab), init, bias=init, activation=Rectlin())
    ]

    model = Model(layers=layers)
    output = model.get_outputs(data_set)

    assert output.shape == (
        data_set.ndata, data_set.seq_length, data_set.nclass)
Esempio n. 23
0
 def layers(self):
     bn = True
     return [
         Conv((7, 7, 96), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1)\
             if self.bn_first_layer else\
             Conv((7, 7, 96), init=Kaiming(), bias=Constant(0), activation=Explin(), padding=3, strides=1),
         Pooling(3, strides=2, padding=1),
         Conv((7, 7, 128), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=3, strides=1),
         Pooling(3, strides=2, padding=1),
         Conv((5, 5, 256), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=2, strides=1),
         Pooling(3, strides=2, padding=1),
         Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1),
         Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1),
         Conv((3, 3, 384), init=Kaiming(), activation=Explin(), batch_norm=bn, padding=1, strides=1),
         Pooling(3, strides=2, padding=1, op='avg'),
         Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn),
         Affine(nout=self.noutputs, init=Kaiming(), activation=Explin(), batch_norm=bn),
         Affine(nout=self.noutputs, init=Kaiming(), bias=Constant(0),
                activation=Softmax() if self.use_softmax else Logistic(shortcut=True))
     ]
Esempio n. 24
0
def gen_model(backend_type):
    # setup backend
    gen_backend(backend=backend_type,
                batch_size=batch_size,
                rng_seed=2,
                device_id=args.device_id,
                datatype=args.datatype)

    init_uni = Uniform(low=-0.1, high=0.1)

    # Set up the model layers
    layers = []
    layers.append(Conv((5, 5, 16), init=init_uni, bias=Constant(0), activation=Rectlin()))
    layers.append(Pooling(2))
    layers.append(Conv((5, 5, 32), init=init_uni, activation=Rectlin()))
    layers.append(Pooling(2))
    layers.append(Affine(nout=500, init=init_uni, activation=Rectlin()))
    layers.append(Affine(nout=10, init=init_uni, activation=Logistic(shortcut=True)))

    mlp = Model(layers=layers)
    return mlp
Esempio n. 25
0
    def input_layers(self, analytics_input, init, activation, gate):
        """
        return the input layers. we currently support convolutional and LSTM
        :return:
        """
        if self.recurrent:
            if analytics_input:
                # support analytics + content
                input_layers = MergeMultistream([[
                    LSTM(300,
                         init,
                         init_inner=Kaiming(),
                         activation=activation,
                         gate_activation=gate,
                         reset_cells=True),
                    RecurrentSum()
                ], [Affine(30, init, activation=activation)]], 'stack')
            else:
                # content only
                input_layers = [
                    LSTM(300,
                         init,
                         init_inner=Kaiming(),
                         activation=activation,
                         gate_activation=gate,
                         reset_cells=True),
                    RecurrentSum()
                ]
        else:
            if analytics_input:
                # support analytics + content
                input_layers = MergeMultistream([
                    self.conv_net(activation),
                    [Affine(30, init, activation=Logistic())]
                ], 'stack')
            else:
                # content only
                input_layers = self.conv_net(activation)

        return input_layers
Esempio n. 26
0
def create_network(stage_depth):
    if stage_depth in (18, 18):
        stages = (2, 2, 2, 2)
    elif stage_depth in (34, 50):
        stages = (3, 4, 6, 3)
    elif stage_depth in (68, 101):
        stages = (3, 4, 23, 3)
    elif stage_depth in (102, 152):
        stages = (3, 8, 36, 3)
    else:
        raise ValueError('Invalid stage_depth value'.format(stage_depth))

    bottleneck = False
    if stage_depth in (50, 101, 152):
        bottleneck = True

    layers = [Conv(**conv_params(7, 64, strides=2)), Pooling(3, strides=2)]

    # Structure of the deep residual part of the network:
    # stage_depth modules of 2 convolutional layers each at feature map depths
    # of 64, 128, 256, 512
    nfms = list(
        itt.chain.from_iterable(
            [itt.repeat(2**(x + 6), r) for x, r in enumerate(stages)]))
    strides = [-1] + [
        1 if cur == prev else 2 for cur, prev in zip(nfms[1:], nfms[:-1])
    ]

    for nfm, stride in zip(nfms, strides):
        layers.append(module_factory(nfm, bottleneck, stride))

    layers.append(Pooling('all', op='avg'))
    layers.append(
        Affine(10,
               init=Kaiming(local=False),
               batch_norm=True,
               activation=Rectlin()))
    layers.append(Affine(1, init=Kaiming(local=False), activation=Logistic()))
    #return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
    return Model(layers=layers), GeneralizedCost(costfunc=CrossEntropyBinary())
    def build(self):
        """
        Build the model's layers
        """
        first_layer_dens = 64
        second_layer_dens = 64
        output_layer_dens = 2
        # setup weight initialization function
        init_norm = Gaussian(scale=0.01)
        # setup model layers
        layers = [
            Affine(nout=first_layer_dens, init=init_norm,
                   activation=Rectlin()),
            Affine(nout=second_layer_dens,
                   init=init_norm,
                   activation=Rectlin()),
            Affine(nout=output_layer_dens,
                   init=init_norm,
                   activation=Logistic(shortcut=True))
        ]

        # initialize model object
        self.model = Model(layers=layers)
Esempio n. 28
0
def test_model_get_outputs(backend_default, data):
    dataset = MNIST(path=data)
    train_set = dataset.train_iter

    init_norm = Gaussian(loc=0.0, scale=0.1)

    layers = [
        Affine(nout=20, init=init_norm, bias=init_norm, activation=Rectlin()),
        Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))
    ]
    mlp = Model(layers=layers)
    out_list = []
    mlp.initialize(train_set)
    for x, t in train_set:
        x = mlp.fprop(x)
        out_list.append(x.get().T.copy())
    ref_output = np.vstack(out_list)

    train_set.reset()
    output = mlp.get_outputs(train_set)
    assert np.allclose(output, ref_output[:output.shape[0], :])

    # test model benchmark inference
    mlp.benchmark(train_set, inference=True, niterations=5)
Esempio n. 29
0
args = parser.parse_args()

# load up the mnist data set
# split into train and tests sets
(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)

# setup a training set iterator
train_set = DataIterator(X_train, y_train, nclass=nclass)
# setup a validation data set iterator
valid_set = DataIterator(X_test, y_test, nclass=nclass)

# setup weight initialization function
init_norm = Gaussian(loc=0.0, scale=0.01)

normrelu = dict(init=init_norm, activation=Rectlin())
normsigm = dict(init=init_norm, activation=Logistic(shortcut=True))
normsoft = dict(init=init_norm, activation=Softmax())

# setup model layers
b1 = BranchNode(name="b1")
b2 = BranchNode(name="b2")


p1 = [Affine(nout=100, linear_name="m_l1", **normrelu),
      b1,
      Affine(nout=32, linear_name="m_l2", **normrelu),
      Affine(nout=16, linear_name="m_l3", **normrelu),
      b2,
      Affine(nout=10, linear_name="m_l4", **normsoft)]

p2 = [b1,
Esempio n. 30
0
# hyperparameters
num_epochs = args.epochs

(X_train, y_train), (X_test, y_test), nclass = load_mnist(path=args.data_dir)
train_set = ArrayIterator([X_train, X_train], y_train, nclass=nclass, lshape=(1, 28, 28))
valid_set = ArrayIterator([X_test, X_test], y_test, nclass=nclass, lshape=(1, 28, 28))

# weight initialization
init_norm = Gaussian(loc=0.0, scale=0.01)

# initialize model
path1 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

path2 = Sequential(layers=[Affine(nout=100, init=init_norm, activation=Rectlin()),
                           Affine(nout=100, init=init_norm, activation=Rectlin())])

layers = [MergeMultistream(layers=[path1, path2], merge="stack"),
          Affine(nout=10, init=init_norm, activation=Logistic(shortcut=True))]

model = Model(layers=layers)
cost = GeneralizedCost(costfunc=CrossEntropyBinary())

# fit and validate
optimizer = GradientDescentMomentum(learning_rate=0.1, momentum_coef=0.9)

# configure callbacks
callbacks = Callbacks(model, eval_set=valid_set, **args.callback_args)

model.fit(train_set, cost=cost, optimizer=optimizer, num_epochs=num_epochs, callbacks=callbacks)