Beispiel #1
0
def define_recurrent_layers(out_axes=None,
                            celltype='RNN',
                            recurrent_units=[32],
                            init=GlorotInit(),
                            return_sequence=True):
    layers = []
    for e, i in enumerate(recurrent_units):
        layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence
        if celltype == 'RNN':
            layers.append(
                Recurrent(nout=i,
                          init=init,
                          backward=False,
                          activation=Tanh(),
                          return_sequence=layer_return_sequence))
        elif celltype == 'LSTM':
            layers.append(
                LSTM(nout=i,
                     init=init,
                     backward=False,
                     activation=Tanh(),
                     gate_activation=Logistic(),
                     return_sequence=layer_return_sequence))
    if out_axes is not None:
        affine_layer = Affine(weight_init=init,
                              bias_init=init,
                              activation=Identity(),
                              axes=out_axes)
        layers.append(affine_layer)
    return layers
Beispiel #2
0
def test_inference_reuse_lstm(recurrent_input):

    layer = LSTM(10, dummy_init, activation=lambda x: x)
    layer(recurrent_input)
    train_params = (layer.W_input["f"], layer.W_recur["f"])
    with Layer.inference_mode_on():
        layer(recurrent_input)
        inference_params = (layer.W_input["f"], layer.W_recur["f"])

    for train_param, inference_param in zip(train_params, inference_params):
        assert train_param is inference_param
# number of classes
ax.Y.length = time_steps

# create iterator and placeholders for training data
train_set = TSPSequentialArrayIterator(data_arrays=tsp_data['train'],
                                       nfeatures=num_features,
                                       batch_size=args.batch_size,
                                       time_steps=time_steps,
                                       total_iterations=args.num_iterations)
inputs = train_set.make_placeholders()

# weight initializationn
init = UniformInit(low=-0.08, high=0.08)

# build computational graph
enc = LSTM(args.hs, init, activation=Tanh(), reset_cells=True,
           gate_activation=Logistic(), return_sequence=True)
dec = LSTM(args.hs, init, activation=Tanh(), reset_cells=True,
           gate_activation=Logistic(), return_sequence=True)

if args.emb is True:
    # encoder input embedding
    hidden_feature_axis = ng.make_axis(length=args.hs, name='hidden_feature_axis')
    feature_axis = ng.make_axis(length=num_features, name='feature_axis')

    W_emb = ng.variable(axes=[hidden_feature_axis, feature_axis], initial_value=init)
    emb_enc_inputs = ng.dot(W_emb, inputs['inp_txt'])

    # decoder input embedding
    emb_dec_input = []
    ax.N.length = args.batch_size
    for i in range(ax.N.length):
Beispiel #4
0
inputs = train_set.make_placeholders()
ax.Y.length = len(tree_bank_data.vocab)


def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.layer_type == "lstm":
    rlayer1 = LSTM(hidden_size,
                   init,
                   activation=Tanh(),
                   gate_activation=Logistic(),
                   return_sequence=True)
    rlayer2 = LSTM(hidden_size,
                   init,
                   activation=Tanh(),
                   gate_activation=Logistic(),
                   return_sequence=True)

# model initialization
seq1 = Sequential([
    Preprocess(functor=expand_onehot), rlayer1, rlayer2,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp(gradient_clip_value=gradient_clip_value)
Beispiel #5
0
def check_lstm(seq_len, input_size, hidden_size,
               batch_size, init_func, return_seq=True, backward=False,
               reset_cells=False, num_iter=2):

    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, name='R')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                       reset_cells=reset_cells, return_sequence=return_seq,
                       backward=backward)

        out_ng = lstm_ng.train_outputs(inp_ng)

        fprop_neon_fun = ex.executor(out_ng, inp_ng)

        fprop_neon_list = []
        input_value_list = []

        for i in range(num_iter):
            # fprop on random inputs
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon = fprop_neon_fun(input_value).copy()

            if return_seq is True:
                fprop_neon = fprop_neon[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_list.append(fprop_neon)

            if reset_cells is False:
                # look at the last hidden states
                assert ng.testing.allclose(fprop_neon[:, -1].reshape(-1, 1),
                                           lstm_ng.h_init.value.get(None),
                                           rtol=rtol, atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon = [lstm_ng.W_input[k].value.get(None).copy().T for k in gates]
        Whh_neon = [lstm_ng.W_recur[k].value.get(None).copy().T for k in gates]
        bh_neon = [lstm_ng.b[k].value.get(None).copy() for k in gates]

        # reference numpy LSTM
        lstm_ref = RefLSTM()
        WLSTM = lstm_ref.init(input_size, hidden_size)

        # make ref weights and biases with neon model
        WLSTM[0, :] = np.concatenate(bh_neon)
        WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1)
        WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1)

        # transpose input X and do fprop
        fprop_ref_list = []
        c0 = h0 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref,
                                                                     WLSTM,
                                                                     c0, h0)
            if reset_cells is False:
                c0 = cprev
                h0 = hprev

            # the output needs transpose as well
            Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
            fprop_ref_list.append(Hout_ref)

        for i in range(num_iter):
            assert ng.testing.allclose(fprop_neon_list[i],
                                       fprop_ref_list[i], rtol=rtol, atol=atol)
Beispiel #6
0
def check_stacked_lstm(seq_len, input_size, hidden_size,
                       batch_size, init_func, return_seq=True, backward=False,
                       reset_cells=False, num_iter=2):

    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, name='R')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng_1 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                         reset_cells=reset_cells, return_sequence=return_seq,
                         backward=backward)
        lstm_ng_2 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                         reset_cells=reset_cells, return_sequence=return_seq,
                         backward=backward)

        out_ng_1 = lstm_ng_1.train_outputs(inp_ng)
        out_ng_2 = lstm_ng_2.train_outputs(out_ng_1)

        fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng)

        # fprop on random inputs for multiple iterations
        fprop_neon_2_list = []
        input_value_list = []

        for i in range(num_iter):
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon_2 = fprop_neon_fun_2(input_value).copy()

            # comparing outputs
            if return_seq is True:
                fprop_neon_2 = fprop_neon_2[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_2_list.append(fprop_neon_2)

            if reset_cells is False:
                # look at the last hidden states
                assert ng.testing.allclose(fprop_neon_2[:, -1].reshape(-1, 1),
                                           lstm_ng_2.h_init.value.get(None),
                                           rtol=rtol, atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_1 = \
            np.concatenate([lstm_ng_1.W_input[k].value.get(None).copy().T for k in gates], 1)
        Whh_neon_1 = \
            np.concatenate([lstm_ng_1.W_recur[k].value.get(None).copy().T for k in gates], 1)
        bh_neon_1 =  \
            np.concatenate([lstm_ng_1.b[k].value.get(None).copy() for k in gates])
        Wxh_neon_2 = \
            np.concatenate([lstm_ng_2.W_input[k].value.get(None).copy().T for k in gates], 1)
        Whh_neon_2 = \
            np.concatenate([lstm_ng_2.W_recur[k].value.get(None).copy().T for k in gates], 1)
        bh_neon_2 = \
            np.concatenate([lstm_ng_2.b[k].value.get(None).copy() for k in gates])

        # reference numpy LSTM
        lstm_ref_1 = RefLSTM()
        lstm_ref_2 = RefLSTM()
        WLSTM_1 = lstm_ref_1.init(input_size, hidden_size)
        WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size)

        # make ref weights and biases the same with neon model
        WLSTM_1[0, :] = bh_neon_1
        WLSTM_1[1:input_size + 1, :] = Wxh_neon_1
        WLSTM_1[input_size + 1:] = Whh_neon_1
        WLSTM_2[0, :] = bh_neon_2
        WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2
        WLSTM_2[hidden_size + 1:] = Whh_neon_2

        # transpose input X and do fprop
        fprop_ref_2_list = []
        c0_1 = h0_1 = None
        c0_2 = h0_2 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref_1, cprev_1, hprev_1, batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1,
                                                                             c0_1, h0_1)
            (Hout_ref_2, cprev_2, hprev_2, batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2,
                                                                             c0_2, h0_2)

            if reset_cells is False:
                c0_1 = cprev_1
                h0_1 = hprev_1
                c0_2 = cprev_2
                h0_2 = hprev_2

            # the output needs transpose as well
            Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size, hidden_size).T

            fprop_ref_2_list.append(Hout_ref_2)

        for i in range(num_iter):
            assert ng.testing.allclose(fprop_neon_2_list[i],
                                       fprop_ref_2_list[i], rtol=rtol, atol=atol)
Beispiel #7
0
    out_axes = ng.make_axes([batch_axis, time_axis, out_axis])
else:
    out_axes = ng.make_axes([batch_axis, out_axis])

# Build placeholders for the created axes
inputs = {
    'X': ng.placeholder(in_axes),
    'y': ng.placeholder(out_axes),
    'iteration': ng.placeholder(axes=())
}

# Network Definition
seq1 = Sequential([
    LSTM(nout=recurrent_units,
         init=init_uni,
         backward=False,
         activation=Logistic(),
         gate_activation=Tanh(),
         return_sequence=predict_seq),
    Affine(weight_init=init_uni,
           bias_init=init_uni,
           activation=Identity(),
           axes=out_axis)
])

# Optimizer
# Following policy will set the initial learning rate to 0.05 (base_lr)
# At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005)
# At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005)
schedule = [num_iterations // 5, num_iterations // 2]
learning_rate_policy = {
    'name': 'schedule',
Beispiel #8
0
# Build placeholders for the created axes
inputs = {
    'X': ng.placeholder(in_axes),
    'y': ng.placeholder(out_axes),
    'iteration': ng.placeholder(axes=())
}

# Network Definition
if (use_embedding is False):
    seq1 = Sequential([
        Preprocess(functor=expand_onehot),
        LSTM(nout=recurrent_units,
             init=init_uni,
             backward=False,
             reset_cells=True,
             activation=Logistic(),
             gate_activation=Tanh(),
             return_sequence=True),
        Affine(weight_init=init_uni,
               bias_init=init_uni,
               activation=Softmax(),
               axes=out_axis)
    ])
else:
    embedding_dim = 8
    seq1 = Sequential([
        LookupTable(len(shakes.vocab) + 1,
                    embedding_dim,
                    init_uni,
                    update=True),
Beispiel #9
0
 def __init__(self):
     super(LSTMLayer, self).__init__()
     self.layer = LSTM(nout=16,
                       init=ConstantInit(0.0),
                       activation=Tanh(),
                       gate_activation=Tanh())
Beispiel #10
0
# Use Array Iterator for training set
train_set = ArrayIterator(train,
                          batch_size=params_dict['batch_size'],
                          total_iterations=params_dict['num_iterations'])
# Use Array Iterator for validation set
valid_set = ArrayIterator(dev,
                          batch_size=params_dict['batch_size'],
                          total_iterations=params_dict['num_iterations'])

# Make placeholderds for training
inputs = train_set.make_placeholders(include_iteration=True)

# Encoding Layer
rlayer_1 = LSTM(hidden_size,
                init,
                activation=Tanh(),
                reset_cells=True,
                gate_activation=Logistic(),
                return_sequence=True)

# Embedding Layer
embed_layer = LookupTable(params_dict['vocab_size'],
                          params_dict['embed_size'],
                          embeddings,
                          update=False,
                          pad_idx=params_dict['pad_idx'])

# Initialzers for LSTM Cells
input_placeholder, input_value = make_placeholder(2 * hidden_size, 1,
                                                  params_dict['batch_size'])
input_placeholder_a, input_value = make_placeholder(2 * hidden_size, 1,
                                                    params_dict['batch_size'])