Esempio n. 1
0
def test_stacked_birnn_construction(recurrent_input, output_size,
                                    weight_initializer, sum_outputs,
                                    concatenate_outputs):
    """
    Tests that birnns can be stacked in all of their configurations. If they cannot, an error will
    be thrown, so no assertions are needed.
    """

    # Generate ngraph RNN
    rnn1 = BiRNN(output_size,
                 init=weight_initializer,
                 activation=Tanh(),
                 reset_cells=True,
                 return_sequence=True,
                 sum_out=sum_outputs,
                 concat_out=concatenate_outputs)
    rnn2 = BiRNN(output_size,
                 init=weight_initializer,
                 activation=Tanh(),
                 reset_cells=True,
                 return_sequence=True,
                 sum_out=sum_outputs,
                 concat_out=concatenate_outputs)

    out = rnn1(recurrent_input)
    rnn2(out)
Esempio n. 2
0
def define_recurrent_layers(out_axes=None,
                            celltype='RNN',
                            recurrent_units=[32],
                            init=GlorotInit(),
                            return_sequence=True):
    layers = []
    for e, i in enumerate(recurrent_units):
        layer_return_sequence = e < len(recurrent_units) - 1 or return_sequence
        if celltype == 'RNN':
            layers.append(
                Recurrent(nout=i,
                          init=init,
                          backward=False,
                          activation=Tanh(),
                          return_sequence=layer_return_sequence))
        elif celltype == 'LSTM':
            layers.append(
                LSTM(nout=i,
                     init=init,
                     backward=False,
                     activation=Tanh(),
                     gate_activation=Logistic(),
                     return_sequence=layer_return_sequence))
    if out_axes is not None:
        affine_layer = Affine(weight_init=init,
                              bias_init=init,
                              activation=Identity(),
                              axes=out_axes)
        layers.append(affine_layer)
    return layers
Esempio n. 3
0
def test_change_recurrent_axis_length(recurrent_layer_cls, batch_size,
                                      sequence_length, input_size,
                                      hidden_size):
    """
    Recurrent layer support for changing REC axis length
    (needed by seq2seq inference)
    """
    # create three identical recurrent layers with same weights
    W_input_val = np.random.normal(size=(hidden_size, input_size))
    W_recur_val = np.random.normal(size=(hidden_size, hidden_size))
    rec1 = recurrent_layer_cls(nout=hidden_size,
                               init=ConstantInit(W_input_val),
                               init_inner=ConstantInit(W_recur_val),
                               activation=Tanh())
    rec2 = recurrent_layer_cls(nout=hidden_size,
                               init=ConstantInit(W_input_val),
                               init_inner=ConstantInit(W_recur_val),
                               activation=Tanh())
    rec3 = recurrent_layer_cls(nout=hidden_size,
                               init=ConstantInit(W_input_val),
                               init_inner=ConstantInit(W_recur_val),
                               activation=Tanh())

    # create input placeholders and values
    # sequence length greater than 1
    N = ng.make_axis(length=batch_size, name='N')
    REC = ng.make_axis(length=sequence_length, name='REC')
    M = ng.make_axis(length=input_size, name='M')
    xn_axes = ng.make_axes([M, REC, N])
    xn = ng.placeholder(axes=xn_axes)
    xn_val = np.random.normal(size=(input_size, sequence_length, batch_size))
    # sequence length 1
    REC1 = ng.make_axis(length=1, name='REC')
    x1_axes = ng.make_axes([M, REC1, N])
    x1 = ng.placeholder(axes=x1_axes)
    x1_val = np.random.normal(size=(input_size, 1, batch_size))

    # check results of switching REC axis of a layer's input
    # computations switching REC axis
    y1_n = rec1(xn)
    y1_1 = rec1(x1)

    # check against not switching
    y2_n = rec2(xn)
    y3_1 = rec3(x1)

    with ExecutorFactory() as ex:

        y1_n_comp = ex.executor(y1_n, xn)
        y1_1_comp = ex.executor(y1_1, x1)
        y2_n_comp = ex.executor(y2_n, xn)
        y3_1_comp = ex.executor(y3_1, x1)

        ng.testing.assert_allclose(y1_n_comp(xn_val), y2_n_comp(xn_val))
        ng.testing.assert_allclose(y1_1_comp(x1_val), y3_1_comp(x1_val))
Esempio n. 4
0
def test_rnn_fprop(sequence_length, input_size, hidden_size, batch_size,
                   return_sequence, weight_initializer, bias_initializer,
                   init_state, extra_axes, backward, transformer_factory):

    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size,
                                                      extra_axes=extra_axes)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Compute reference numpy RNN
    rnn_ref = RefRecurrent(input_size,
                           hidden_size,
                           return_sequence=return_sequence)
    rnn_ref.set_weights(W_in.reshape(rnn_ref.Wxh.shape), W_rec,
                        b.reshape(rnn_ref.bh.shape))

    # Compute reference numpy RNN
    input_shape = (input_size, sequence_length, batch_size)
    h_ref_list = rnn_ref.fprop_only(input_value.reshape(input_shape).transpose(
        [1, 0, 2]),
                                    init_states=init_state_value,
                                    backward=backward)

    # Generate ngraph RNN
    rnn_ng = Recurrent(hidden_size,
                       init=W_in,
                       init_inner=W_rec,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=return_sequence,
                       backward=backward)

    # fprop ngraph RNN
    out_ng = rnn_ng(input_placeholder, init_state=init_state)

    with ExecutorFactory() as ex:
        # Create computation and execute
        if init_state is not None:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder, init_state)
            fprop_neon = fprop_neon_fun(input_value, init_state_value)

        else:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder)
            fprop_neon = fprop_neon_fun(input_value)

        # Compare output with reference implementation
        if return_sequence is True:
            fprop_neon = fprop_neon[:, :, 0]
        ng.testing.assert_allclose(fprop_neon,
                                   h_ref_list,
                                   rtol=fprop_rtol,
                                   atol=fprop_atol)
Esempio n. 5
0
def test_birnn_fprop(sequence_length, input_size, hidden_size, batch_size,
                     return_sequence, weight_initializer, bias_initializer,
                     init_state, sum_out, concat_out, transformer_factory):

    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Compute reference numpy RNN
    rnn_ref = RefBidirectional(input_size,
                               hidden_size,
                               return_sequence=return_sequence,
                               sum_out=sum_out,
                               concat_out=concat_out)
    rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.fwd_rnn.bh.shape))
    h_ref_list = rnn_ref.fprop(input_value.transpose([1, 0, 2]),
                               init_states=init_state_value)

    # Generate ngraph RNN
    rnn_ng = BiRNN(hidden_size,
                   init=W_in,
                   init_inner=W_rec,
                   activation=Tanh(),
                   reset_cells=True,
                   return_sequence=return_sequence,
                   sum_out=sum_out,
                   concat_out=concat_out)

    # fprop ngraph RNN
    out_ng = rnn_ng(input_placeholder, init_state=init_state)

    with ExecutorFactory() as ex:
        # Create computation and execute
        if init_state is not None:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder, init_state)
            fprop_neon = fprop_neon_fun(input_value, init_state_value)

        else:
            fprop_neon_fun = ex.executor(out_ng, input_placeholder)
            fprop_neon = fprop_neon_fun(input_value)

        # Compare output with reference implementation
        if not isinstance(fprop_neon, tuple):
            fprop_neon = [fprop_neon]
            h_ref_list = [h_ref_list]
        for ii, output in enumerate(fprop_neon):
            if return_sequence is True:
                output = output[:, :, 0]
            ng.testing.assert_allclose(output,
                                       h_ref_list[ii],
                                       rtol=fprop_rtol,
                                       atol=fprop_atol)
Esempio n. 6
0
def test_birnn_output_types(recurrent_input, output_size, weight_initializer,
                            sum_outputs, concatenate_outputs):
    """
    Tests that birnns output ops of the right type
    """

    # Generate ngraph RNN
    rnn1 = BiRNN(output_size,
                 init=weight_initializer,
                 activation=Tanh(),
                 reset_cells=True,
                 return_sequence=True,
                 sum_out=sum_outputs,
                 concat_out=concatenate_outputs)
    out = rnn1(recurrent_input)

    if concatenate_outputs:
        assert isinstance(out, ng.ConcatOp), \
            "Output is of type {} instead of {}".format(type(out), ng.ConcatOp)
    elif sum_outputs:
        assert isinstance(out, ng.Add), \
            "Output is of type {} instead of {}".format(type(out), ng.Add)
    else:
        assert isinstance(out, tuple), \
            "Output is of type {} instead of {}".format(type(out), tuple)
Esempio n. 7
0
def test_birnn_deriv_numerical(sequence_length, input_size, hidden_size, batch_size,
                               return_sequence, weight_initializer, bias_initializer,
                               sum_out, concat_out):

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(input_placeholder, hidden_size,
                                                                weight_initializer,
                                                                bias_initializer)

    # Generate ngraph RNN
    rnn_ng = BiRNN(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(),
                   reset_cells=True, return_sequence=return_sequence,
                   sum_out=sum_out, concat_out=concat_out)

    # fprop ngraph RNN
    out_ng = rnn_ng.train_outputs(input_placeholder)

    w_in_f = rnn_ng.fwd_rnn.W_input
    w_rec_f = rnn_ng.fwd_rnn.W_recur
    b_f = rnn_ng.fwd_rnn.b
    w_in_b = rnn_ng.bwd_rnn.W_input
    w_rec_b = rnn_ng.bwd_rnn.W_recur
    b_b = rnn_ng.bwd_rnn.b

    params_f = [(w_in_f, W_in),
                (w_rec_f, W_rec),
                (b_f, b)]

    params_b = [(w_in_b, W_in),
                (w_rec_b, W_rec),
                (b_b, b)]

    if sum_out or concat_out:
        out_ng = [out_ng]
        params_birnn = [params_f + params_b]
    else:
        # in this case out_ng will be a list
        params_birnn = [params_f, params_b]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        dep_list = list()
        for output, dependents in zip(out_ng, params_birnn):
            for px, _ in dependents:
                update = (ex.derivative(output, px, input_placeholder),
                          ex.numeric_derivative(output, px, delta, input_placeholder))
                param_updates.append(update)
            dep_list += dependents

        for ii, ((deriv_s, deriv_n), (_, val)) in enumerate(zip(param_updates, dep_list)):
            ng.testing.assert_allclose(deriv_s(val, input_value),
                                       deriv_n(val, input_value),
                                       rtol=num_rtol,
                                       atol=num_atol)
Esempio n. 8
0
def test_rnn_deriv_numerical(sequence_length, input_size, hidden_size,
                             batch_size, return_sequence, weight_initializer,
                             bias_initializer, backward, init_state,
                             transformer_factory):

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size,
                                                      sequence_length,
                                                      batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(
        input_placeholder, hidden_size, weight_initializer, bias_initializer,
        init_state)

    # Generate ngraph RNN
    rnn_ng = Recurrent(hidden_size,
                       init=W_in,
                       init_inner=W_rec,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=return_sequence,
                       backward=backward)

    # fprop ngraph RNN
    out_ng = rnn_ng(input_placeholder, init_state=init_state)

    params = [(rnn_ng.W_input, W_in), (rnn_ng.W_recur, W_rec), (rnn_ng.b, b)]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        for px, _ in params:
            if init_state is not None:
                update = (ex.derivative(out_ng, px, input_placeholder,
                                        init_state),
                          ex.numeric_derivative(out_ng, px, delta,
                                                input_placeholder, init_state))
            else:
                update = (ex.derivative(out_ng, px, input_placeholder),
                          ex.numeric_derivative(out_ng, px, delta,
                                                input_placeholder))
            param_updates.append(update)

        for (deriv_s, deriv_n), (_, val) in zip(param_updates, params):
            if init_state is not None:
                ng.testing.assert_allclose(deriv_s(val, input_value,
                                                   init_state_value),
                                           deriv_n(val, input_value,
                                                   init_state_value),
                                           rtol=num_rtol,
                                           atol=num_atol)
            else:
                ng.testing.assert_allclose(deriv_s(val, input_value),
                                           deriv_n(val, input_value),
                                           rtol=num_rtol,
                                           atol=num_atol)
Esempio n. 9
0
def make_generator(bn=True):
    # TODO
    # add affine before conv once that is corrected
    # https://github.com/NervanaSystems/private-ngraph/issues/2054
    deconv_layers = [
        Deconvolution((1, 1, 16),
                      filter_init,
                      strides=1,
                      padding=0,
                      activation=relu,
                      batch_norm=bn),
        Deconvolution((3, 3, 192),
                      filter_init,
                      strides=1,
                      padding=0,
                      activation=relu,
                      batch_norm=bn,
                      deconv_out_shape=(1, 5, 5)),
        Deconvolution((3, 3, 192),
                      filter_init,
                      strides=2,
                      padding=0,
                      activation=relu,
                      batch_norm=bn,
                      deconv_out_shape=(1, 11, 11)),
        Deconvolution((3, 3, 192),
                      filter_init,
                      strides=1,
                      padding=0,
                      activation=relu,
                      batch_norm=bn,
                      deconv_out_shape=(1, 13, 13)),
        Deconvolution((3, 3, 96),
                      filter_init,
                      strides=2,
                      padding=0,
                      activation=relu,
                      batch_norm=bn,
                      deconv_out_shape=(1, 27, 27)),
        Deconvolution((3, 3, 96),
                      filter_init,
                      strides=1,
                      padding=0,
                      activation=relu,
                      batch_norm=bn,
                      deconv_out_shape=(1, 28, 28)),
        Deconvolution((3, 3, 1),
                      filter_init,
                      strides=1,
                      padding=1,
                      activation=Tanh(),
                      batch_norm=False,
                      deconv_out_shape=(1, 28, 28))
    ]
    return Sequential(deconv_layers, name="Generator")
Esempio n. 10
0
def test_rnn_deriv_ref(sequence_length, input_size, hidden_size, batch_size,
                       return_sequence, weight_initializer, bias_initializer,
                       transformer_factory):

    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"
    assert return_sequence is True, "the reference rnn only supports sequences for deriv"

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(input_placeholder, hidden_size,
                                                                weight_initializer,
                                                                bias_initializer)

    # Compute reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size, return_sequence=return_sequence)
    rnn_ref.set_weights(W_in, W_rec, b.reshape(rnn_ref.bh.shape))

    # Prepare deltas for gradient check
    output_shape = (hidden_size, sequence_length, batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    dW_in, dW_rec, db = rnn_ref.lossFun(input_value.transpose([1, 0, 2]),
                                        deltas.copy().transpose([1, 0, 2]),
                                        init_states=init_state_value)[:3]

    # Generate ngraph RNN
    rnn_ng = Recurrent(hidden_size, init=W_in, init_inner=W_rec, activation=Tanh(),
                       reset_cells=True, return_sequence=return_sequence)

    # fprop ngraph RNN
    out_ng = rnn_ng.train_outputs(input_placeholder)

    deltas_constant = ng.constant(deltas, axes=out_ng.axes)
    params = [(rnn_ng.W_input, W_in),
              (rnn_ng.W_recur, W_rec),
              (rnn_ng.b, b)]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        for px, _ in params:
            update = ng.deriv(out_ng, px, error=deltas_constant)
            param_updates.append(ex.executor(update, input_placeholder))

        for update_fun, ref_val in zip(param_updates, [dW_in, dW_rec, db]):
            ng.testing.assert_allclose(update_fun(input_value),
                                       ref_val.squeeze(),
                                       rtol=bprop_rtol, atol=bprop_atol)
Esempio n. 11
0
def make_generator_gp(bn=True, n_extra_layers=0, bias_init=None):
    deconv_layers = [
        Deconvolution((4, 4, 512),
                      filter_init,
                      strides=1,
                      padding=0,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init),
        Deconvolution((4, 4, 256),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init),
        Deconvolution((4, 4, 128),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init),
        Deconvolution((4, 4, 64),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init)
    ]

    for i in range(n_extra_layers):
        deconv_layers.append(
            Convolution((3, 3, 64),
                        filter_init,
                        strides=1,
                        padding=1,
                        activation=lrelu,
                        batch_norm=bn,
                        bias_init=bias_init))

    deconv_layers.append(
        Deconvolution((4, 4, 3),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=Tanh(),
                      batch_norm=False,
                      bias_init=bias_init))
    return Sequential(deconv_layers, name="Generator")
Esempio n. 12
0
def test_rnn_deriv_numerical(sequence_length, input_size, hidden_size, batch_size, return_sequence,
                             weight_initializer, bias_initializer, backward, init_state):

    # Get input placeholder and numpy array
    input_placeholder, input_value = make_placeholder(input_size, sequence_length, batch_size)

    # Construct network weights and initial state, if desired
    W_in, W_rec, b, init_state, init_state_value = make_weights(input_placeholder, hidden_size,
                                                                weight_initializer,
                                                                bias_initializer,
                                                                init_state)

    # Generate ngraph RNN
    rnn_ng = RNNCell(hidden_size, init=W_in, init_h2h=W_rec, activation=Tanh(),
                     reset_cells=True)

    # fprop ngraph RNN
    num_steps = input_placeholder.axes.recurrent_axis().length
    init_states = {'h': init_state} if init_state is not None else init_state
    out_ng = unroll(rnn_ng, num_steps, input_placeholder, init_states=init_states,
                    return_sequence=return_sequence)

    params = [(rnn_ng.i2h.linear.W, W_in),
              (rnn_ng.h2h.W, W_rec),
              # (rnn_ng.i2h.bias.W, b)
              ]

    with ExecutorFactory() as ex:
        # Create derivative computations and execute
        param_updates = list()
        for px, _ in params:
            if init_state is not None:
                update = (ex.derivative(out_ng, px, input_placeholder, init_state),
                          ex.numeric_derivative(out_ng, px, delta, input_placeholder, init_state))
            else:
                update = (ex.derivative(out_ng, px, input_placeholder),
                          ex.numeric_derivative(out_ng, px, delta, input_placeholder))
            param_updates.append(update)

        for (deriv_s, deriv_n), (_, val) in zip(param_updates, params):
            if init_state is not None:
                ng.testing.assert_allclose(deriv_s(val, input_value, init_state_value),
                                           deriv_n(val, input_value, init_state_value),
                                           rtol=num_rtol, atol=num_atol)
            else:
                ng.testing.assert_allclose(deriv_s(val, input_value),
                                           deriv_n(val, input_value),
                                           rtol=num_rtol, atol=num_atol)
Esempio n. 13
0
def make_generator(bn=True, bias_init=None):
    deconv_layers = [
        Affine(weight_init=filter_init,
               activation=None,
               batch_norm=False,
               axes=ng.make_axes({
                   "C": 1024,
                   "H": 4,
                   "W": 4
               })),
        Deconvolution((4, 4, 512),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init),
        Deconvolution((4, 4, 256),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init),
        Deconvolution((4, 4, 128),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=relu,
                      batch_norm=bn,
                      bias_init=bias_init)
    ]

    deconv_layers.append(
        Deconvolution((4, 4, 3),
                      filter_init,
                      strides=2,
                      padding=1,
                      activation=Tanh(),
                      batch_norm=False,
                      bias_init=bias_init))
    return Sequential(deconv_layers, name="Generator")
Esempio n. 14
0
    def __init__(self, input_placeholder, output_size, RNN, bn_params):

        # Set up axes
        F, T, N = tuple(input_placeholder.axes)
        H = ng.make_axis(length=output_size, name="hidden")
        H2 = ng.make_axis(length=output_size, name="hidden_tmp")

        self.input_placeholder = input_placeholder

        # Make reference placeholder
        self.reference_input = ng.placeholder(axes=[H, T, N])

        # Create weight matrices
        w_rec_axes = ng.make_axes([H, H2])
        w_in_axes = ng.make_axes([H, F])
        self.W_rec = rng.uniform(-1, 1, w_rec_axes)
        self.W_in = rng.uniform(-1, 1, w_in_axes)
        self.W_id = np.eye(output_size).astype("float32")

        self.rnn_args = dict(nout=output_size,
                             init_inner=self.W_rec,
                             return_sequence=True,
                             activation=Tanh())

        self.reference_rnn = RNN(init=self.W_id, **self.rnn_args)
        self.rnn = RNN(init=self.W_in, batch_norm=True, **self.rnn_args)

        if self.has_gates:
            self.batch_norm_dict = self.rnn.batch_norm
        else:
            self.batch_norm_dict = {'gate': self.rnn.batch_norm}

        self.default_gate = list(self.batch_norm_dict.keys())[0]

        for bn in self.batch_norm_dict.values():
            bn.__dict__.update(bn_params)
Esempio n. 15
0
valid_set = SequentialArrayIterator(ptb_data['valid'], batch_size=args.batch_size,
                                    time_steps=time_steps)

inputs = train_set.make_placeholders()
ax.Y.length = len(tree_bank_data.vocab)


def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.layer_type == "lstm":
    rlayer1 = LSTM(hidden_size, init, activation=Tanh(),
                   gate_activation=Logistic(), return_sequence=True)
    rlayer2 = LSTM(hidden_size, init, activation=Tanh(),
                   gate_activation=Logistic(), return_sequence=True)

# model initialization
seq1 = Sequential([Preprocess(functor=expand_onehot),
                   rlayer1,
                   rlayer2,
                   Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y,))])

optimizer = RMSProp(gradient_clip_value=gradient_clip_value)

train_prob = seq1(inputs['inp_txt'])
train_loss = ng.cross_entropy_multi(train_prob,
                                    ng.one_hot(inputs['tgt_txt'], axis=ax.Y),
Esempio n. 16
0

def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.use_lut:
    layer_0 = LookupTable(50, 100, init, update=True, pad_idx=0)
else:
    layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y))

if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size, init, activation=Tanh())
elif args.layer_type == "birnn":
    rlayer = BiRNN(hidden_size, init, activation=Tanh(), return_sequence=True, sum_out=True)

if args.use_lut:
    layer_0 = LookupTable(50, 100, init, update=False)
else:
    layer_0 = Preprocess(functor=expand_onehot)

# model initialization
seq1 = Sequential([layer_0,
                   rlayer,
                   Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y,))])

optimizer = RMSProp()
Esempio n. 17
0
 def __init__(self):
     super(BiRNNLayer, self).__init__()
     self.layer = BiRNN(nout=16, init=ConstantInit(0.0), activation=Tanh())
Esempio n. 18
0
def test_seq2seq_deriv_ref(batch_size, sequence_length_enc,
                           sequence_length_dec, input_size, hidden_size,
                           weight_initializer, bias_initializer,
                           transformer_factory):

    # TODO: are these assumptions true?
    assert batch_size == 1, "the seq2seq reference implementation only support batch size 1"

    # Get input placeholders and numpy arrays
    input_placeholder_enc, input_value_enc, = \
        make_placeholder(input_size, sequence_length_enc, batch_size)
    input_placeholder_dec, input_value_dec, = \
        make_placeholder(input_size, sequence_length_dec, batch_size)

    # Construct encoder weights
    W_in_enc, W_rec_enc, b_enc, _, _ = make_weights(input_placeholder_enc,
                                                    hidden_size,
                                                    weight_initializer,
                                                    bias_initializer,
                                                    init_state=False)

    # Construct decoder weights
    W_in_dec, W_rec_dec, b_dec, _, _ = make_weights(input_placeholder_dec,
                                                    hidden_size,
                                                    weight_initializer,
                                                    bias_initializer,
                                                    init_state=False)

    # Reference numpy seq2seq
    seq2seq_ref = RefSeq2Seq(input_size,
                             hidden_size,
                             decoder_return_sequence=True)
    seq2seq_ref.set_weights(W_in_enc, W_rec_enc,
                            b_enc.reshape(seq2seq_ref.bh_enc.shape), W_in_dec,
                            W_rec_dec, b_dec.reshape(seq2seq_ref.bh_dec.shape))

    # Prepare deltas for gradient check
    output_shape = (hidden_size, sequence_length_dec, batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    dW_in_enc, dW_rec_enc, db_enc, dW_in_dec, dW_rec_dec, db_dec, encoding_ref, hs_return_dec = \
        seq2seq_ref.lossFun(input_value_enc.transpose([1, 0, 2]),
                            input_value_dec.transpose([1, 0, 2]),
                            deltas.copy().transpose([1, 0, 2]))

    # Generate ngraph Seq2Seq
    rnn_enc_ng = Recurrent(hidden_size,
                           init=W_in_enc,
                           init_inner=W_rec_enc,
                           activation=Tanh(),
                           reset_cells=True,
                           return_sequence=False)
    rnn_dec_ng = Recurrent(hidden_size,
                           init=W_in_dec,
                           init_inner=W_rec_dec,
                           activation=Tanh(),
                           reset_cells=True,
                           return_sequence=True)

    # ngraph fprop graph
    encoding_ng = rnn_enc_ng(input_placeholder_enc, init_state=None)
    output_ng = rnn_dec_ng(input_placeholder_dec, init_state=encoding_ng)

    deltas_constant = ng.constant(deltas, axes=output_ng.axes)
    params = [(rnn_dec_ng.b, db_dec), (rnn_dec_ng.W_input, dW_in_dec),
              (rnn_dec_ng.W_recur, dW_rec_dec), (rnn_enc_ng.b, db_enc),
              (rnn_enc_ng.W_input, dW_in_enc),
              (rnn_enc_ng.W_recur, dW_rec_enc)]

    with ExecutorFactory() as ex:

        # fprop computations
        fprop_fun = ex.executor([encoding_ng, output_ng],
                                input_placeholder_enc, input_placeholder_dec)

        # gradient computations
        update_funs = []
        for px, _ in params:
            update = ng.deriv(output_ng, px, error=deltas_constant)
            update_funs.append(
                ex.executor(update, input_placeholder_enc,
                            input_placeholder_dec))

        # check forward pass
        encoding, output = fprop_fun(input_value_enc, input_value_dec)
        ng.testing.assert_allclose(encoding, encoding_ref)
        ng.testing.assert_allclose(np.squeeze(output),
                                   np.squeeze(hs_return_dec))

        # check gradient computations
        for update_fun, (_, deriv_ref_val) in zip(update_funs, params):
            grad_neon = update_fun(input_value_enc, input_value_dec)
            ng.testing.assert_allclose(grad_neon,
                                       deriv_ref_val.squeeze(),
                                       rtol=bprop_rtol,
                                       atol=1e-4)
Esempio n. 19
0
train_set = ArrayIterator(imdb_data['train'],
                          batch_size=args.batch_size,
                          total_iterations=args.num_iterations)
valid_set = ArrayIterator(imdb_data['valid'], batch_size=args.batch_size)

inputs = train_set.make_placeholders()
ax.Y.length = imdb_dataset.nclass

# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size,
                       init,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=False)
else:
    rlayer = BiRNN(hidden_size,
                   init,
                   activation=Tanh(),
                   reset_cells=True,
                   return_sequence=False,
                   sum_out=True)

# model initialization
seq1 = Sequential([
    LookupTable(vocab_size, embed_size, init, update=True, pad_idx=pad_idx),
    rlayer,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
Esempio n. 20
0
train_set = SequentialArrayIterator(ptb_data['train'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps,
                                    total_iterations=args.num_iterations)

valid_set = SequentialArrayIterator(ptb_data['valid'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps)

# weight initialization
init = UniformInit(low=-0.08, high=0.08)

# model initialization
seq1 = Sequential([
    Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)),
    Recurrent(hidden_size, init, activation=Tanh()),
    Affine(weight_init=init,
           activation=Softmax(),
           bias_init=init,
           axes=(ax.Y, ax.REC))
])

# Bind axes lengths:
ax.Y.length = len(tree_bank_data.vocab)
ax.REC.length = time_steps
ax.N.length = args.batch_size

# placeholders with descriptive names
inputs = dict(inp_txt=ng.placeholder([ax.REC, ax.N]),
              tgt_txt=ng.placeholder([ax.REC, ax.N]))
Esempio n. 21
0
 def __init__(self):
     super(LSTMLayer, self).__init__()
     self.layer = LSTM(nout=16,
                       init=ConstantInit(0.0),
                       activation=Tanh(),
                       gate_activation=Tanh())
Esempio n. 22
0
train_set = SequentialArrayIterator(ptb_data['train'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps,
                                    total_iterations=args.num_iterations)

valid_set = SequentialArrayIterator(ptb_data['valid'],
                                    batch_size=args.batch_size,
                                    time_steps=time_steps)

# weight initialization
init = UniformInit(low=-0.08, high=0.08)

# model initialization
seq1 = Sequential([
    Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y)),
    Recurrent(hidden_size, init, activation=Tanh(), reset_cells=False),
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ax.REC))
])

# Bind axes lengths:
ax.Y.length = len(tree_bank_data.vocab)
ax.REC.length = time_steps
ax.N.length = args.batch_size

# placeholders with descriptive names
inputs = dict(inp_txt=ng.placeholder([ax.REC, ax.N]),
              tgt_txt=ng.placeholder([ax.REC, ax.N]))

optimizer = RMSProp(decay_rate=0.95,
                    learning_rate=2e-3,
                    epsilon=1e-6,
Esempio n. 23
0
def check_rnn(seq_len,
              input_size,
              hidden_size,
              batch_size,
              init_func,
              return_seq=True):
    # init_func is the initializer for the model params
    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"

    # ========== neon model ==========
    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, recurrent=True)
    N = ng.make_axis(batch_size, batch=True)
    H = ng.make_axis(hidden_size)
    ax_s = ng.make_axes([H, N])

    ex = ExecutorFactory()
    np.random.seed(0)

    rnn_ng = Recurrent(hidden_size,
                       init_func,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=return_seq)

    inp_ng = ng.placeholder([Cin, REC, N])
    init_state_ng = ng.placeholder(ax_s)

    # fprop graph
    out_ng = rnn_ng.train_outputs(inp_ng, init_state=init_state_ng)
    out_ng.input = True

    rnn_W_input = rnn_ng.W_input
    rnn_W_input.input = True
    rnn_W_recur = rnn_ng.W_recur
    rnn_W_recur.input = True
    rnn_b = rnn_ng.b
    rnn_b.input = True

    fprop_neon_fun = ex.executor(out_ng, inp_ng, init_state_ng)

    dWrecur_s_fun = ex.derivative(out_ng, rnn_W_recur, inp_ng, rnn_W_input,
                                  rnn_b)
    dWrecur_n_fun = ex.numeric_derivative(out_ng, rnn_W_recur, delta, inp_ng,
                                          rnn_W_input, rnn_b)
    dWinput_s_fun = ex.derivative(out_ng, rnn_W_input, inp_ng, rnn_W_recur,
                                  rnn_b)
    dWinput_n_fun = ex.numeric_derivative(out_ng, rnn_W_input, delta, inp_ng,
                                          rnn_W_recur, rnn_b)
    dWb_s_fun = ex.derivative(out_ng, rnn_b, inp_ng, rnn_W_input, rnn_W_recur)
    dWb_n_fun = ex.numeric_derivative(out_ng, rnn_b, delta, inp_ng,
                                      rnn_W_input, rnn_W_recur)

    # fprop on random inputs
    input_value = rng.uniform(-1, 1, inp_ng.axes)
    init_state_value = rng.uniform(-1, 1, init_state_ng.axes)
    fprop_neon = fprop_neon_fun(input_value, init_state_value).copy()

    # after the rnn graph has been executed, can get the W values. Get copies so
    # shared values don't confuse derivatives
    Wxh_neon = rnn_ng.W_input.value.get(None).copy()
    Whh_neon = rnn_ng.W_recur.value.get(None).copy()
    bh_neon = rnn_ng.b.value.get(None).copy()

    # bprop derivs
    dWrecur_s = dWrecur_s_fun(Whh_neon, input_value, Wxh_neon, bh_neon)
    dWrecur_n = dWrecur_n_fun(Whh_neon, input_value, Wxh_neon, bh_neon)
    np.testing.assert_allclose(dWrecur_s, dWrecur_n, rtol=rtol, atol=atol)

    dWb_s = dWb_s_fun(bh_neon, input_value, Wxh_neon, Whh_neon)
    dWb_n = dWb_n_fun(bh_neon, input_value, Wxh_neon, Whh_neon)
    np.testing.assert_allclose(dWb_s, dWb_n, rtol=rtol, atol=atol)

    dWinput_s = dWinput_s_fun(Wxh_neon, input_value, Whh_neon, bh_neon)
    dWinput_n = dWinput_n_fun(Wxh_neon, input_value, Whh_neon, bh_neon)
    np.testing.assert_allclose(dWinput_s, dWinput_n, rtol=rtol, atol=atol)

    # ========= reference model ==========
    output_shape = (hidden_size, seq_len * batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size,
                                         hidden_size).swapaxes(1, 2)

    inp_ref = input_value.transpose([1, 0, 2])

    # reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size)
    rnn_ref.Wxh[:] = Wxh_neon
    rnn_ref.Whh[:] = Whh_neon
    rnn_ref.bh[:] = bh_neon.reshape(rnn_ref.bh.shape)

    (dWxh_ref, dWhh_ref, db_ref, h_ref_list, dh_ref_list,
     d_out_ref) = rnn_ref.lossFun(inp_ref,
                                  deltas_ref,
                                  init_states=init_state_value)

    # comparing outputs
    if return_seq is False:
        h_ref_list = h_ref_list[:, -1].reshape(-1, 1)
    else:
        fprop_neon = fprop_neon[:, :, 0]
    np.testing.assert_allclose(fprop_neon, h_ref_list, rtol=0.0, atol=1.0e-5)

    return
Esempio n. 24
0
    out_axes = ng.make_axes([batch_axis, out_axis])

# Build placeholders for the created axes
inputs = {
    'X': ng.placeholder(in_axes),
    'y': ng.placeholder(out_axes),
    'iteration': ng.placeholder(axes=())
}

# Network Definition
seq1 = Sequential([
    LSTM(nout=recurrent_units,
         init=init_uni,
         backward=False,
         activation=Logistic(),
         gate_activation=Tanh(),
         return_sequence=predict_seq),
    Affine(weight_init=init_uni,
           bias_init=init_uni,
           activation=Identity(),
           axes=out_axis)
])

# Optimizer
# Following policy will set the initial learning rate to 0.05 (base_lr)
# At iteration (num_iterations // 5), learning rate is multiplied by gamma (new lr = .005)
# At iteration (num_iterations // 2), it will be reduced by gamma again (new lr = .0005)
schedule = [num_iterations // 5, num_iterations // 2]
learning_rate_policy = {
    'name': 'schedule',
    'schedule': schedule,
Esempio n. 25
0
# number of classes
ax.Y.length = time_steps

# create iterator and placeholders for training data
train_set = TSPSequentialArrayIterator(data_arrays=tsp_data['train'],
                                       nfeatures=num_features,
                                       batch_size=args.batch_size,
                                       time_steps=time_steps,
                                       total_iterations=args.num_iterations)
inputs = train_set.make_placeholders()

# weight initializationn
init = UniformInit(low=-0.08, high=0.08)

# build computational graph
enc = LSTM(args.hs, init, activation=Tanh(), reset_cells=True,
           gate_activation=Logistic(), return_sequence=True)
dec = LSTM(args.hs, init, activation=Tanh(), reset_cells=True,
           gate_activation=Logistic(), return_sequence=True)

if args.emb is True:
    # encoder input embedding
    hidden_feature_axis = ng.make_axis(length=args.hs, name='hidden_feature_axis')
    feature_axis = ng.make_axis(length=num_features, name='feature_axis')

    W_emb = ng.variable(axes=[hidden_feature_axis, feature_axis], initial_value=init)
    emb_enc_inputs = ng.dot(W_emb, inputs['inp_txt'])

    # decoder input embedding
    emb_dec_input = []
    ax.N.length = args.batch_size
Esempio n. 26
0
def check_lstm(seq_len, input_size, hidden_size,
               batch_size, init_func, return_seq=True, backward=False,
               reset_cells=False, num_iter=2):

    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, name='R')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                       reset_cells=reset_cells, return_sequence=return_seq,
                       backward=backward)

        out_ng = lstm_ng.train_outputs(inp_ng)

        fprop_neon_fun = ex.executor(out_ng, inp_ng)

        fprop_neon_list = []
        input_value_list = []

        for i in range(num_iter):
            # fprop on random inputs
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon = fprop_neon_fun(input_value).copy()

            if return_seq is True:
                fprop_neon = fprop_neon[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_list.append(fprop_neon)

            if reset_cells is False:
                # look at the last hidden states
                assert ng.testing.allclose(fprop_neon[:, -1].reshape(-1, 1),
                                           lstm_ng.h_init.value.get(None),
                                           rtol=rtol, atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon = [lstm_ng.W_input[k].value.get(None).copy().T for k in gates]
        Whh_neon = [lstm_ng.W_recur[k].value.get(None).copy().T for k in gates]
        bh_neon = [lstm_ng.b[k].value.get(None).copy() for k in gates]

        # reference numpy LSTM
        lstm_ref = RefLSTM()
        WLSTM = lstm_ref.init(input_size, hidden_size)

        # make ref weights and biases with neon model
        WLSTM[0, :] = np.concatenate(bh_neon)
        WLSTM[1:input_size + 1, :] = np.concatenate(Wxh_neon, 1)
        WLSTM[input_size + 1:] = np.concatenate(Whh_neon, 1)

        # transpose input X and do fprop
        fprop_ref_list = []
        c0 = h0 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref,
                                                                     WLSTM,
                                                                     c0, h0)
            if reset_cells is False:
                c0 = cprev
                h0 = hprev

            # the output needs transpose as well
            Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
            fprop_ref_list.append(Hout_ref)

        for i in range(num_iter):
            assert ng.testing.allclose(fprop_neon_list[i],
                                       fprop_ref_list[i], rtol=rtol, atol=atol)
Esempio n. 27
0
h_dim = 4
minibatch_discrimination = False
num_iterations = 600
batch_size = 12
num_examples = num_iterations * batch_size

# generator
generator_layers = [
    affine_layer(h_dim, Rectlin(), name='g0'),
    affine_layer(1, Identity(), name='g1')
]
generator = Sequential(generator_layers)

# discriminator
discriminator_layers = [
    affine_layer(2 * h_dim, Tanh(), name='d0'),
    affine_layer(2 * h_dim, Tanh(), name='d1')
]
if minibatch_discrimination:
    raise NotImplementedError
else:
    discriminator_layers.append(affine_layer(2 * h_dim, Tanh(), name='d2'))
discriminator_layers.append(affine_layer(1, Logistic(), name='d3'))
discriminator = Sequential(discriminator_layers)

# TODO discriminator pre-training

# dataloader
np.random.seed(1)
toy_gan_data = ToyGAN(batch_size, num_iterations)
train_data = toy_gan_data.load_data()
Esempio n. 28
0

def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

if args.use_lut:
    layer_0 = LookupTable(50, 100, init, update=True, pad_idx=0)
else:
    layer_0 = Preprocess(functor=lambda x: ng.one_hot(x, axis=ax.Y))

if args.layer_type == "rnn":
    rlayer = Recurrent(hidden_size, init, activation=Tanh())
elif args.layer_type == "birnn":
    rlayer = BiRNN(hidden_size,
                   init,
                   activation=Tanh(),
                   return_sequence=True,
                   sum_out=True)

# model initialization
seq1 = Sequential([
    layer_0, rlayer,
    Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y, ))
])

optimizer = RMSProp()
Esempio n. 29
0
def check_stacked_lstm(seq_len, input_size, hidden_size,
                       batch_size, init_func, return_seq=True, backward=False,
                       reset_cells=False, num_iter=2):

    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, name='R')
    N = ng.make_axis(batch_size, name='N')

    with ExecutorFactory() as ex:
        np.random.seed(0)

        inp_ng = ng.placeholder([Cin, REC, N])

        lstm_ng_1 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                         reset_cells=reset_cells, return_sequence=return_seq,
                         backward=backward)
        lstm_ng_2 = LSTM(hidden_size, init_func, activation=Tanh(), gate_activation=Logistic(),
                         reset_cells=reset_cells, return_sequence=return_seq,
                         backward=backward)

        out_ng_1 = lstm_ng_1.train_outputs(inp_ng)
        out_ng_2 = lstm_ng_2.train_outputs(out_ng_1)

        fprop_neon_fun_2 = ex.executor(out_ng_2, inp_ng)

        # fprop on random inputs for multiple iterations
        fprop_neon_2_list = []
        input_value_list = []

        for i in range(num_iter):
            input_value = rng.uniform(-1, 1, inp_ng.axes)
            fprop_neon_2 = fprop_neon_fun_2(input_value).copy()

            # comparing outputs
            if return_seq is True:
                fprop_neon_2 = fprop_neon_2[:, :, 0]

            input_value_list.append(input_value)
            fprop_neon_2_list.append(fprop_neon_2)

            if reset_cells is False:
                # look at the last hidden states
                assert ng.testing.allclose(fprop_neon_2[:, -1].reshape(-1, 1),
                                           lstm_ng_2.h_init.value.get(None),
                                           rtol=rtol, atol=atol)

        # after the rnn graph has been executed, can get the W values. Get copies so
        # shared values don't confuse derivatives
        # concatenate weights to i, f, o, g together (in this order)
        gates = ['i', 'f', 'o', 'g']
        Wxh_neon_1 = \
            np.concatenate([lstm_ng_1.W_input[k].value.get(None).copy().T for k in gates], 1)
        Whh_neon_1 = \
            np.concatenate([lstm_ng_1.W_recur[k].value.get(None).copy().T for k in gates], 1)
        bh_neon_1 =  \
            np.concatenate([lstm_ng_1.b[k].value.get(None).copy() for k in gates])
        Wxh_neon_2 = \
            np.concatenate([lstm_ng_2.W_input[k].value.get(None).copy().T for k in gates], 1)
        Whh_neon_2 = \
            np.concatenate([lstm_ng_2.W_recur[k].value.get(None).copy().T for k in gates], 1)
        bh_neon_2 = \
            np.concatenate([lstm_ng_2.b[k].value.get(None).copy() for k in gates])

        # reference numpy LSTM
        lstm_ref_1 = RefLSTM()
        lstm_ref_2 = RefLSTM()
        WLSTM_1 = lstm_ref_1.init(input_size, hidden_size)
        WLSTM_2 = lstm_ref_2.init(hidden_size, hidden_size)

        # make ref weights and biases the same with neon model
        WLSTM_1[0, :] = bh_neon_1
        WLSTM_1[1:input_size + 1, :] = Wxh_neon_1
        WLSTM_1[input_size + 1:] = Whh_neon_1
        WLSTM_2[0, :] = bh_neon_2
        WLSTM_2[1:hidden_size + 1, :] = Wxh_neon_2
        WLSTM_2[hidden_size + 1:] = Whh_neon_2

        # transpose input X and do fprop
        fprop_ref_2_list = []
        c0_1 = h0_1 = None
        c0_2 = h0_2 = None
        for i in range(num_iter):
            input_value = input_value_list[i]
            inp_ref = input_value.copy().transpose([1, 2, 0])
            (Hout_ref_1, cprev_1, hprev_1, batch_cache) = lstm_ref_1.forward(inp_ref, WLSTM_1,
                                                                             c0_1, h0_1)
            (Hout_ref_2, cprev_2, hprev_2, batch_cache) = lstm_ref_2.forward(Hout_ref_1, WLSTM_2,
                                                                             c0_2, h0_2)

            if reset_cells is False:
                c0_1 = cprev_1
                h0_1 = hprev_1
                c0_2 = cprev_2
                h0_2 = hprev_2

            # the output needs transpose as well
            Hout_ref_2 = Hout_ref_2.reshape(seq_len * batch_size, hidden_size).T

            fprop_ref_2_list.append(Hout_ref_2)

        for i in range(num_iter):
            assert ng.testing.allclose(fprop_neon_2_list[i],
                                       fprop_ref_2_list[i], rtol=rtol, atol=atol)
Esempio n. 30
0
                                    get_prev_target=True)

inputs = train_set.make_placeholders()
ax.Y.length = len(tree_bank_data.vocab)


def expand_onehot(x):
    return ng.one_hot(x, axis=ax.Y)


# weight initialization
init = UniformInit(low=-0.08, high=0.08)

# model initialization
one_hot_enc = Preprocess(functor=expand_onehot)
enc = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=False)
one_hot_dec = Preprocess(functor=expand_onehot)
dec = Recurrent(hidden_size, init, activation=Tanh(), reset_cells=True, return_sequence=True)
linear = Affine(init, activation=Softmax(), bias_init=init, axes=(ax.Y))

optimizer = RMSProp(decay_rate=0.95, learning_rate=2e-3, epsilon=1e-6,
                    gradient_clip_value=gradient_clip_value)

# build network graph
one_hot_enc_out = one_hot_enc(inputs['inp_txt'])
one_hot_dec_out = one_hot_dec(inputs['prev_tgt'])
enc_out = enc(one_hot_enc_out)
dec_out = dec(one_hot_dec_out, init_state=enc_out)
output_prob = linear(dec_out)

loss = ng.cross_entropy_multi(output_prob,