Ejemplo n.º 1
0
def test_stacked_birnn_construction(recurrent_input, output_size,
                                    weight_initializer, sum_outputs,
                                    concatenate_outputs):
    """
    Tests that birnns can be stacked in all of their configurations. If they cannot, an error will
    be thrown, so no assertions are needed.
    """

    # Generate ngraph RNN
    rnn1 = BiRNN(output_size,
                 init=weight_initializer,
                 activation=Tanh(),
                 reset_cells=True,
                 return_sequence=True,
                 sum_out=sum_outputs,
                 concat_out=concatenate_outputs)
    rnn2 = BiRNN(output_size,
                 init=weight_initializer,
                 activation=Tanh(),
                 reset_cells=True,
                 return_sequence=True,
                 sum_out=sum_outputs,
                 concat_out=concatenate_outputs)

    out = rnn1(recurrent_input)
    rnn2(out)
Ejemplo n.º 2
0
def test_change_recurrent_axis_length(recurrent_layer_cls, batch_size,
                                      sequence_length, input_size,
                                      hidden_size):
    """
    Recurrent layer support for changing REC axis length
    (needed by seq2seq inference)
    """
    # create three identical recurrent layers with same weights
    W_input_val = np.random.normal(size=(hidden_size, input_size))
    W_recur_val = np.random.normal(size=(hidden_size, hidden_size))
    rec1 = recurrent_layer_cls(nout=hidden_size,
                               init=ConstantInit(W_input_val),
                               init_inner=ConstantInit(W_recur_val),
                               activation=Tanh())
    rec2 = recurrent_layer_cls(nout=hidden_size,
                               init=ConstantInit(W_input_val),
                               init_inner=ConstantInit(W_recur_val),
                               activation=Tanh())
    rec3 = recurrent_layer_cls(nout=hidden_size,
                               init=ConstantInit(W_input_val),
                               init_inner=ConstantInit(W_recur_val),
                               activation=Tanh())

    # create input placeholders and values
    # sequence length greater than 1
    N = ng.make_axis(length=batch_size, name='N')
    REC = ng.make_axis(length=sequence_length, name='REC')
    M = ng.make_axis(length=input_size, name='M')
    xn_axes = ng.make_axes([M, REC, N])
    xn = ng.placeholder(axes=xn_axes)
    xn_val = np.random.normal(size=(input_size, sequence_length, batch_size))
    # sequence length 1
    REC1 = ng.make_axis(length=1, name='REC')
    x1_axes = ng.make_axes([M, REC1, N])
    x1 = ng.placeholder(axes=x1_axes)
    x1_val = np.random.normal(size=(input_size, 1, batch_size))

    # check results of switching REC axis of a layer's input
    # computations switching REC axis
    y1_n = rec1(xn)
    y1_1 = rec1(x1)

    # check against not switching
    y2_n = rec2(xn)
    y3_1 = rec3(x1)

    with ExecutorFactory() as ex:

        y1_n_comp = ex.executor(y1_n, xn)
        y1_1_comp = ex.executor(y1_1, x1)
        y2_n_comp = ex.executor(y2_n, xn)
        y3_1_comp = ex.executor(y3_1, x1)

        ng.testing.assert_allclose(y1_n_comp(xn_val), y2_n_comp(xn_val))
        ng.testing.assert_allclose(y1_1_comp(x1_val), y3_1_comp(x1_val))
Ejemplo n.º 3
0
def test_birnn_output_types(recurrent_input, output_size, weight_initializer,
                            sum_outputs, concatenate_outputs):
    """
    Tests that birnns output ops of the right type
    """

    # Generate ngraph RNN
    rnn1 = BiRNN(output_size,
                 init=weight_initializer,
                 activation=Tanh(),
                 reset_cells=True,
                 return_sequence=True,
                 sum_out=sum_outputs,
                 concat_out=concatenate_outputs)
    out = rnn1(recurrent_input)

    if concatenate_outputs:
        assert isinstance(out, ng.ConcatOp), \
            "Output is of type {} instead of {}".format(type(out), ng.ConcatOp)
    elif sum_outputs:
        assert isinstance(out, ng.Add), \
            "Output is of type {} instead of {}".format(type(out), ng.Add)
    else:
        assert isinstance(out, tuple), \
            "Output is of type {} instead of {}".format(type(out), tuple)
Ejemplo n.º 4
0
def test_birnn_fprop(sequence_length, input_size, hidden_size, batch_size,
                     return_sequence, weight_initializer, bias_initializer,
                     init_state, sum_out, concat_out):

    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Compute reference numpy RNN
    rnn_ref = RefBidirectional(input_size,
                               hidden_size,
                               return_sequence=return_sequence,
                               sum_out=sum_out,
                               concat_out=concat_out)
    rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.fwd_rnn.bh.shape))
    h_ref_list = rnn_ref.fprop(input_value.transpose([1, 0, 2]),
                               init_states=init_state_value)

    # Generate ngraph RNN
    rnn_ng = BiRNN(hidden_size,
                   init=W_in,
                   init_inner=W_rec,
                   activation=Tanh(),
                   reset_cells=True,
                   return_sequence=return_sequence,
                   sum_out=sum_out,
                   concat_out=concat_out)

    # fprop ngraph RNN
    out_ng = rnn_ng(input_placeholder, init_state=init_state)

    with ExecutorFactory() as ex:
        # Create computation and execute
        if init_state is not None:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder, init_state)
            fprop_neon = fprop_neon_fun(input_value, init_state_value)

        else:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder)
            fprop_neon = fprop_neon_fun(input_value)

        # Compare output with reference implementation
        if not isinstance(fprop_neon, tuple):
            fprop_neon = [fprop_neon]
            h_ref_list = [h_ref_list]
        for ii, output in enumerate(fprop_neon):
            if return_sequence is True:
                output = output[:, :, 0]
            ng.testing.assert_allclose(output,
                                       h_ref_list[ii],
                                       rtol=fprop_rtol,
                                       atol=fprop_atol)
Ejemplo n.º 5
0
def test_rnn_fprop(sequence_length, input_size, hidden_size, batch_size,
                   return_sequence, weight_initializer, bias_initializer,
                   init_state, extra_axes, backward):

    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size,
                                                      extra_axes=extra_axes)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Compute reference numpy RNN
    rnn_ref = RefRecurrent(input_size,
                           hidden_size,
                           return_sequence=return_sequence)
    rnn_ref.set_weights(W_in.reshape(rnn_ref.Wxh.shape), W_rec,
                        b.reshape(rnn_ref.bh.shape))

    # Compute reference numpy RNN
    input_shape = (input_size, sequence_length, batch_size)
    h_ref_list = rnn_ref.fprop_only(input_value.reshape(input_shape).transpose(
        [1, 0, 2]),
                                    init_states=init_state_value,
                                    backward=backward)

    # Generate ngraph RNN
    rnn_ng = Recurrent(hidden_size,
                       init=W_in,
                       init_inner=W_rec,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=return_sequence,
                       backward=backward)

    # fprop ngraph RNN
    out_ng = rnn_ng(input_placeholder, init_state=init_state)

    with ExecutorFactory() as ex:
        # Create computation and execute
        if init_state is not None:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder, init_state)
            fprop_neon = fprop_neon_fun(input_value, init_state_value)

        else:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder)
            fprop_neon = fprop_neon_fun(input_value)

        # Compare output with reference implementation
        if return_sequence is True:
            fprop_neon = fprop_neon[:, :, 0]
        ng.testing.assert_allclose(fprop_neon,
                                   h_ref_list,
                                   rtol=fprop_rtol,
                                   atol=fprop_atol)
Ejemplo n.º 6
0
def test_rnn_deriv_numerical(sequence_length, input_size, hidden_size,
                             batch_size, return_sequence, weight_initializer,
                             bias_initializer, backward, init_state):

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Generate ngraph RNN
    rnn_ng = Recurrent(hidden_size,
                       init=W_in,
                       init_inner=W_rec,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=return_sequence,
                       backward=backward)

    # fprop ngraph RNN
    out_ng = rnn_ng(input_placeholder, init_state=init_state)

    params = [(rnn_ng.W_input, W_in), (rnn_ng.W_recur, W_rec), (rnn_ng.b, b)]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        for px, _ in params:
            if init_state is not None:
                update = (ex.derivative(out_ng, px, input_placeholder,
                                        init_state),
                          ex.numeric_derivative(out_ng, px, delta,
                                                input_placeholder, init_state))
            else:
                update = (ex.derivative(out_ng, px, input_placeholder),
                          ex.numeric_derivative(out_ng, px, delta,
                                                input_placeholder))
            param_updates.append(update)

        for (deriv_s, deriv_n), (_, val) in zip(param_updates, params):
            if init_state is not None:
                ng.testing.assert_allclose(deriv_s(val, input_value,
                                                   init_state_value),
                                           deriv_n(val, input_value,
                                                   init_state_value),
                                           rtol=num_rtol,
                                           atol=num_atol)
            else:
                ng.testing.assert_allclose(deriv_s(val, input_value),
                                           deriv_n(val, input_value),
                                           rtol=num_rtol,
                                           atol=num_atol)
Ejemplo n.º 7
0
    def __init__(self, input_placeholder, output_size, RNN, bn_params):

        # Set up axes
        F, T, N = tuple(input_placeholder.axes)
        H = ng.make_axis(length=output_size, name="hidden")
        H2 = ng.make_axis(length=output_size, name="hidden_tmp")

        self.input_placeholder = input_placeholder

        # Make reference placeholder
        self.reference_input = ng.placeholder(axes=[H, T, N])

        # Create weight matrices
        w_rec_axes = ng.make_axes([H, H2])
        w_in_axes = ng.make_axes([H, F])
        self.W_rec = rng.uniform(-1, 1, w_rec_axes)
        self.W_in = rng.uniform(-1, 1, w_in_axes)
        self.W_id = np.eye(output_size).astype("float32")

        self.rnn_args = dict(nout=output_size,
                             init_inner=self.W_rec,
                             return_sequence=True,
                             activation=Tanh())

        self.reference_rnn = RNN(init=self.W_id, **self.rnn_args)
        self.rnn = RNN(init=self.W_in, batch_norm=True, **self.rnn_args)

        if self.has_gates:
            self.batch_norm_dict = self.rnn.batch_norm
        else:
            self.batch_norm_dict = {'gate': self.rnn.batch_norm}

        self.default_gate = list(self.batch_norm_dict.keys())[0]

        for bn in self.batch_norm_dict.values():
            bn.__dict__.update(bn_params)
Ejemplo n.º 8
0
# Output is of size (vocab_size + 1,1)
# +1 is for unknown token
out_axis = ng.make_axis(length=len(shakes.vocab) + 1, name="out_feature_axis")
in_axes = ng.make_axes([batch_axis, time_axis])
out_axes = ng.make_axes([batch_axis, time_axis])

# Build placeholders for the created axes
inputs = {'X': ng.placeholder(in_axes), 'y': ng.placeholder(out_axes),
          'iteration': ng.placeholder(axes=())}

# Network Definition
if(use_embedding is False):
    seq1 = Sequential([Preprocess(functor=expand_onehot),
                       LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True,
                            activation=Logistic(), gate_activation=Tanh(),
                            return_sequence=True),
                       Affine(weight_init=init_uni, bias_init=init_uni,
                              activation=Softmax(), axes=out_axis)])
else:
    embedding_dim = 8
    seq1 = Sequential([LookupTable(len(shakes.vocab) + 1, embedding_dim, init_uni, update=True),
                       LSTM(nout=recurrent_units, init=init_uni, backward=False, reset_cells=True,
                            activation=Logistic(), gate_activation=Tanh(),
                            return_sequence=True),
                       Affine(weight_init=init_uni, bias_init=init_uni,
                              activation=Softmax(), axes=out_axis)])

# Optimizer
# Initial learning rate is 0.01 (base_lr)
# At iteration (num_iterations // 75), lr is multiplied by gamma (new lr = .95 * .01)
Ejemplo n.º 9
0
def check_stacked_lstm(seq_len,
                       input_size,
                       hidden_size,
                       batch_size,
                       init_func,
                       return_seq=True,
                       backward=False,
                       reset_cells=False,
                       num_iter=2):

    Cin = ng.make_axis(input_size, name='Feature')
    REC = ng.make_axis(seq_len, name='REC')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng_1 = LSTM(hidden_size,
                         init_func,
                         activation=Tanh(),
                         gate_activation=Logistic(),
                         reset_cells=reset_cells,
                         return_sequence=return_seq,
                         backward=backward)
        lstm_ng_2 = LSTM(hidden_size + 1,
                         init_func,
                         activation=Tanh(),
                         gate_activation=Logistic(),
                         reset_cells=reset_cells,
                         return_sequence=return_seq,
                         backward=backward)

        out_ng_1 = lstm_ng_1(inp_ng)
        out_ng_2 = lstm_ng_2(out_ng_1)

        fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng)

        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_1_fun = copier_T(
            ex.executor(list(lstm_ng_1.W_input[k] for k in gates)))
        Whh_neon_1_fun = copier_T(
            ex.executor(list(lstm_ng_1.W_recur[k] for k in gates)))
        bh_neon_1_fun = copier(ex.executor(list(lstm_ng_1.b[k]
                                                for k in gates)))
        Wxh_neon_2_fun = copier_T(
            ex.executor(list(lstm_ng_2.W_input[k] for k in gates)))
        Whh_neon_2_fun = copier_T(
            ex.executor(list(lstm_ng_2.W_recur[k] for k in gates)))
        bh_neon_2_fun = copier(ex.executor(list(lstm_ng_2.b[k]
                                                for k in gates)))

        h_init_fun = ex.executor(lstm_ng_2.h_init)

        # fprop on random inputs for multiple iterations
        fprop_neon_2_list = []
        input_value_list = []

        for i in range(num_iter):
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon_2 = fprop_neon_fun_2(input_value).copy()

            # comparing outputs
            if return_seq is True:
                fprop_neon_2 = fprop_neon_2[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_2_list.append(fprop_neon_2)

            if reset_cells is False:
                # look at the last hidden states
                h_init_neon = fprop_neon_2[:, -1].reshape(-1, 1)
                h_init_ng = h_init_fun()
                ng.testing.assert_allclose(h_init_neon,
                                           h_init_ng,
                                           rtol=rtol,
                                           atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_1 = np.concatenate(Wxh_neon_1_fun(), 1)
        Whh_neon_1 = np.concatenate(Whh_neon_1_fun(), 1)
        bh_neon_1 = np.concatenate(bh_neon_1_fun())
        Wxh_neon_2 = np.concatenate(Wxh_neon_2_fun(), 1)
        Whh_neon_2 = np.concatenate(Whh_neon_2_fun(), 1)
        bh_neon_2 = np.concatenate(bh_neon_2_fun())

        # reference numpy LSTM
        lstm_ref_1 = RefLSTM()
        lstm_ref_2 = RefLSTM()
        WLSTM_1 = lstm_ref_1.init(input_size, hidden_size)
        WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size + 1)

        # make ref weights and biases the same with neon model
        WLSTM_1[0, :] = bh_neon_1
        WLSTM_1[1:input_size + 1, :] = Wxh_neon_1
        WLSTM_1[input_size + 1:] = Whh_neon_1
        WLSTM_2[0, :] = bh_neon_2
        WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2
        WLSTM_2[hidden_size + 1:] = Whh_neon_2

        # transpose input X and do fprop
        fprop_ref_2_list = []
        c0_1 = h0_1 = None
        c0_2 = h0_2 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref_1, cprev_1, hprev_1,
             batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1, c0_1, h0_1)
            (Hout_ref_2, cprev_2, hprev_2,
             batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2, c0_2, h0_2)

            if reset_cells is False:
                c0_1 = cprev_1
                h0_1 = hprev_1
                c0_2 = cprev_2
                h0_2 = hprev_2

            # the output needs transpose as well
            Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size,
                                            hidden_size + 1).T

            fprop_ref_2_list.append(Hout_ref_2)

        for i in range(num_iter):
            ng.testing.assert_allclose(fprop_neon_2_list[i],
                                       fprop_ref_2_list[i],
                                       rtol=rtol,
                                       atol=atol)
Ejemplo n.º 10
0
def check_lstm(seq_len,
               input_size,
               hidden_size,
               batch_size,
               init_func,
               return_seq=True,
               backward=False,
               reset_cells=False,
               num_iter=2):

    Cin = ng.make_axis(input_size, name='Feature')
    REC = ng.make_axis(seq_len, name='REC')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng = LSTM(hidden_size,
                       init_func,
                       activation=Tanh(),
                       gate_activation=Logistic(),
                       reset_cells=reset_cells,
                       return_sequence=return_seq,
                       backward=backward)

        out_ng = lstm_ng(inp_ng)

        fprop_neon_fun = copier(ex.executor((out_ng, lstm_ng.h_init), inp_ng))

        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_fun = copier_T(
            ex.executor(list(lstm_ng.W_input[k] for k in gates)))
        Whh_neon_fun = copier_T(
            ex.executor(list(lstm_ng.W_recur[k] for k in gates)))
        bh_neon_fun = copier(ex.executor(list(lstm_ng.b[k] for k in gates)))

        fprop_neon_list = []
        input_value_list = []

        for i in range(num_iter):
            # fprop on random inputs
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon, h_init_neon = fprop_neon_fun(input_value)

            if return_seq is True:
                fprop_neon = fprop_neon[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_list.append(fprop_neon)

            if reset_cells is False:
                # look at the last hidden states
                ng.testing.assert_allclose(fprop_neon[:, -1].reshape(-1, 1),
                                           h_init_neon,
                                           rtol=rtol,
                                           atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        Wxh_neon = Wxh_neon_fun()
        Whh_neon = Whh_neon_fun()
        bh_neon = bh_neon_fun()

        # reference numpy LSTM
        lstm_ref = RefLSTM()
        WLSTM = lstm_ref.init(input_size, hidden_size)

        # make ref weights and biases with neon model
        WLSTM[0, :] = np.concatenate(bh_neon)
        WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1)
        WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1)

        # transpose input X and do fprop
        fprop_ref_list = []
        c0 = h0 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref, cprev, hprev,
             batch_cache) = lstm_ref.forward(inp_ref, WLSTM, c0, h0)
            if reset_cells is False:
                c0 = cprev
                h0 = hprev

            # the output needs transpose as well
            Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
            fprop_ref_list.append(Hout_ref)

        for i in range(num_iter):
            ng.testing.assert_allclose(fprop_neon_list[i],
                                       fprop_ref_list[i],
                                       rtol=rtol,
                                       atol=atol)
Ejemplo n.º 11
0
    out_axes = ng.make_axes([batch_axis, out_axis])

# Build placeholders for the created axes
inputs = {
    'X': ng.placeholder(in_axes),
    'y': ng.placeholder(out_axes),
    'iteration': ng.placeholder(axes=())
}

# Network Definition
seq1 = Sequential([
    LSTM(nout=recurrent_units,
         init=init_uni,
         backward=False,
         activation=Logistic(),
         gate_activation=Tanh(),
         return_sequence=predict_seq),
    Affine(weight_init=init_uni,
           bias_init=init_uni,
           activation=Identity(),
           axes=out_axis)
])

# Optimizer
# Following policy will set the initial learning rate to 0.05 (base_lr)
# At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005)
# At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005)
schedule = [num_iterations // 5, num_iterations // 2]
learning_rate_policy = {
    'name': 'schedule',
    'schedule': schedule,
Ejemplo n.º 12
0

def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.use_lut:
    layer_0 = LookupTable(50, 100, init, update=True, pad_idx=0)
else:
    layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y))

if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size, init, activation=Tanh())
elif args.layer_type == "birnn":
    rlayer = BiRNN(hidden_size,
                   init,
                   activation=Tanh(),
                   return_sequence=True,
                   sum_out=True)

# model initialization
seq1 = Sequential([
    layer_0, rlayer,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp()
Ejemplo n.º 13
0
    return np.squeeze(np.array(tokens)).T


def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

# model initialization
one_hot_enc = Preprocess(functor=expand_onehot)
enc = Recurrent(hidden_size,
                init,
                activation=Tanh(),
                reset_cells=True,
                return_sequence=False)
one_hot_dec = Preprocess(functor=expand_onehot)
dec = Recurrent(hidden_size,
                init,
                activation=Tanh(),
                reset_cells=True,
                return_sequence=True)
linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y))

optimizer = RMSProp(decay_rate=0.95,
                    learning_rate=2e-3,
                    epsilon=1e-6,
                    gradient_clip_value=gradient_clip_value)
Ejemplo n.º 14
0
def test_rnn_deriv_ref(sequence_length, input_size, hidden_size, batch_size,
                       return_sequence, weight_initializer, bias_initializer,
                       init_state):

    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"
    assert return_sequence is True, "the reference rnn only supports sequences for deriv"

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Compute reference numpy RNN
    rnn_ref = RefRecurrent(input_size,
                           hidden_size,
                           return_sequence=return_sequence)
    rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.bh.shape))

    # Prepare deltas for gradient check
    output_shape = (hidden_size, sequence_length, batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    dW_in, dW_rec, db = rnn_ref.lossFun(input_value.transpose([1, 0, 2]),
                                        deltas.copy().transpose([1, 0, 2]),
                                        init_states=init_state_value)[:3]

    # Generate ngraph RNN
    rnn_ng = RNNCell(hidden_size,
                     init=W_in,
                     init_h2h=W_rec,
                     activation=Tanh(),
                     reset_cells=True)

    # fprop ngraph RNN
    num_steps = input_placeholder.axes.recurrent_axis().length
    init_states = {'h': init_state} if init_state is not None else init_state
    out_ng = unroll(rnn_ng,
                    num_steps,
                    input_placeholder,
                    init_states=init_states,
                    return_sequence=return_sequence)
    deltas_constant = ng.constant(deltas, axes=out_ng.axes)
    params = [(rnn_ng.i2h.linear.W, W_in), (rnn_ng.h2h.W, W_rec),
              (rnn_ng.i2h.bias.W, b)]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        for px, _ in params:
            update = ng.deriv(out_ng, px, error=deltas_constant)
            if init_state is not None:
                param_updates.append(
                    ex.executor(update, input_placeholder, init_state))
            else:
                param_updates.append(ex.executor(update, input_placeholder))

        for update_fun, ref_val in zip(param_updates, [dW_in, dW_rec, db]):
            if init_state is not None:
                grad_neon = update_fun(input_value, init_state_value)
            else:
                grad_neon = update_fun(input_value)
            ng.testing.assert_allclose(grad_neon,
                                       ref_val.squeeze(),
                                       rtol=bprop_rtol,
                                       atol=bprop_atol)
Ejemplo n.º 15
0
def test_seq2seq_deriv_ref(batch_size, sequence_length_enc,
                           sequence_length_dec, input_size, hidden_size,
                           weight_initializer, bias_initializer):

    # TODO: are these assumptions true?
    assert batch_size == 1, "the seq2seq reference implementation only support batch size 1"

    # Get input placeholders and numpy arrays
    input_placeholder_enc, input_value_enc, = \
        make_placeholder(input_size, sequence_length_enc, batch_size)
    input_placeholder_dec, input_value_dec, = \
        make_placeholder(input_size, sequence_length_dec, batch_size)

    # Construct encoder weights
    W_in_enc, W_rec_enc, b_enc, _, _ = make_weights(input_placeholder_enc,
                                                    hidden_size,
                                                    weight_initializer,
                                                    bias_initializer,
                                                    init_state=False)

    # Construct decoder weights
    W_in_dec, W_rec_dec, b_dec, _, _ = make_weights(input_placeholder_dec,
                                                    hidden_size,
                                                    weight_initializer,
                                                    bias_initializer,
                                                    init_state=False)

    # Reference numpy seq2seq
    seq2seq_ref = RefSeq2Seq(input_size,
                             hidden_size,
                             decoder_return_sequence=True)
    seq2seq_ref.set_weights(W_in_enc, W_rec_enc,
                            b_enc.reshape(seq2seq_ref.bh_enc.shape), W_in_dec,
                            W_rec_dec, b_dec.reshape(seq2seq_ref.bh_dec.shape))

    # Prepare deltas for gradient check
    output_shape = (hidden_size, sequence_length_dec, batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    dW_in_enc, dW_rec_enc, db_enc, dW_in_dec, dW_rec_dec, db_dec, encoding_ref, hs_return_dec = \
        seq2seq_ref.lossFun(input_value_enc.transpose([1, 0, 2]),
                            input_value_dec.transpose([1, 0, 2]),
                            deltas.copy().transpose([1, 0, 2]))

    # Generate ngraph Seq2Seq
    rnn_enc_ng = Recurrent(hidden_size,
                           init=W_in_enc,
                           init_inner=W_rec_enc,
                           activation=Tanh(),
                           reset_cells=True,
                           return_sequence=False)
    rnn_dec_ng = Recurrent(hidden_size,
                           init=W_in_dec,
                           init_inner=W_rec_dec,
                           activation=Tanh(),
                           reset_cells=True,
                           return_sequence=True)

    # ngraph fprop graph
    encoding_ng = rnn_enc_ng(input_placeholder_enc, init_state=None)
    output_ng = rnn_dec_ng(input_placeholder_dec, init_state=encoding_ng)

    deltas_constant = ng.constant(deltas, axes=output_ng.axes)
    params = [(rnn_dec_ng.b, db_dec), (rnn_dec_ng.W_input, dW_in_dec),
              (rnn_dec_ng.W_recur, dW_rec_dec), (rnn_enc_ng.b, db_enc),
              (rnn_enc_ng.W_input, dW_in_enc),
              (rnn_enc_ng.W_recur, dW_rec_enc)]

    with ExecutorFactory() as ex:

        # fprop computations
        fprop_fun = ex.executor([encoding_ng, output_ng],
                                input_placeholder_enc, input_placeholder_dec)

        # gradient computations
        update_funs = []
        for px, _ in params:
            update = ng.deriv(output_ng, px, error=deltas_constant)
            update_funs.append(
                ex.executor(update, input_placeholder_enc,
                            input_placeholder_dec))

        # check forward pass
        encoding, output = fprop_fun(input_value_enc, input_value_dec)
        ng.testing.assert_allclose(encoding, encoding_ref, rtol=1e-5, atol=1e-5, \
                                   transformer_overwrite=False)
        ng.testing.assert_allclose(np.squeeze(output), np.squeeze(hs_return_dec), \
                                   rtol=1e-5, atol=1e-5,
                                   transformer_overwrite=False)

        # check gradient computations
        for update_fun, (_, deriv_ref_val) in zip(update_funs, params):
            grad_neon = update_fun(input_value_enc, input_value_dec)
            ng.testing.assert_allclose(grad_neon,
                                       deriv_ref_val.squeeze(),
                                       rtol=1e-5,
                                       atol=1e-4)
Ejemplo n.º 16
0
def test_birnn_deriv_numerical(sequence_length, input_size, hidden_size,
                               batch_size, return_sequence, weight_initializer,
                               bias_initializer, sum_out, concat_out):

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer)

    # Generate ngraph RNN
    rnn_ng = BiRNN(hidden_size,
                   init=W_in,
                   init_inner=W_rec,
                   activation=Tanh(),
                   reset_cells=True,
                   return_sequence=return_sequence,
                   sum_out=sum_out,
                   concat_out=concat_out)

    # fprop ngraph RNN
    out_ng = rnn_ng(input_placeholder)

    w_in_f = rnn_ng.fwd_rnn.W_input
    w_rec_f = rnn_ng.fwd_rnn.W_recur
    b_f = rnn_ng.fwd_rnn.b
    w_in_b = rnn_ng.bwd_rnn.W_input
    w_rec_b = rnn_ng.bwd_rnn.W_recur
    b_b = rnn_ng.bwd_rnn.b

    params_f = [(w_in_f, W_in), (w_rec_f, W_rec), (b_f, b)]

    params_b = [(w_in_b, W_in), (w_rec_b, W_rec), (b_b, b)]

    if sum_out or concat_out:
        out_ng = [out_ng]
        params_birnn = [params_f + params_b]
    else:
        # in this case out_ng will be a list
        params_birnn = [params_f, params_b]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        dep_list = list()
        for output, dependents in zip(out_ng, params_birnn):
            for px, _ in dependents:
                update = (ex.derivative(output, px, input_placeholder),
                          ex.numeric_derivative(output, px, delta,
                                                input_placeholder))
                param_updates.append(update)
            dep_list += dependents

        for ii, ((deriv_s, deriv_n),
                 (_, val)) in enumerate(zip(param_updates, dep_list)):
            ng.testing.assert_allclose(deriv_s(val, input_value),
                                       deriv_n(val, input_value),
                                       rtol=num_rtol,
                                       atol=num_atol)