Ejemplo n.º 1
0
def test_gru():
    hidden_units = 33
    batch_size = 13
    time_steps = 7
    embedding_size = 8
    time_major = snp.TIME_MAJOR

    # tensorflow results
    gru = tf.contrib.rnn.GRUCell(hidden_units, activation=tf.nn.tanh)
    # initial state of the GRU
    state = np.repeat(np.asarray(np.random.rand(1, hidden_units), dtype=DTYPE),
                      batch_size,
                      axis=0)

    # inputs to GRU
    if time_major:
        inputs = np.asarray(np.random.rand(time_steps, batch_size,
                                           embedding_size),
                            dtype=DTYPE)
        sequence_length = np.random.randint(1, time_steps + 1, batch_size)
    else:
        inputs = np.asarray(np.random.rand(batch_size, time_steps,
                                           embedding_size),
                            dtype=DTYPE)
        sequence_length = np.random.randint(1, time_steps + 1, batch_size)

    outputs, _ = tf.nn.dynamic_rnn(cell=gru,
                                   inputs=tf.convert_to_tensor(inputs),
                                   sequence_length=sequence_length,
                                   initial_state=state,
                                   dtype=DTYPE,
                                   time_major=time_major)

    # get outputs of tensorflow
    init = tf.global_variables_initializer()
    train_vars = tf.trainable_variables()

    with tf.Session() as sess:
        sess.run(init)
        train_vars_vals = sess.run(train_vars)
        dict_var_vals = {
            k.name.split(':')[0]: v
            for k, v in zip(train_vars, train_vars_vals)
        }
        outputs_tf = sess.run(outputs)

    # test numpy implementation
    gru_np = snp.GRUCell(state, activation=np.tanh, base_name='rnn')
    graph = snp.Graph()
    graph.initialize(dict_var_vals)
    outputs_np, _ = gru_np.encode(inputs, sequence_length=sequence_length)
    np.testing.assert_array_almost_equal(outputs_tf, outputs_np)
Ejemplo n.º 2
0
def test_attention():
    batch_size = 5
    attention_time_steps = 6
    attention_size = 7
    state_size = 8
    encoder_output_size = 9

    # random input
    if snp.TIME_MAJOR:
        attention_keys = np.asarray(np.random.rand(attention_time_steps,
                                                   batch_size, attention_size),
                                    dtype=DTYPE)

        attention_values_np = np.asarray(np.random.rand(
            attention_time_steps, batch_size, encoder_output_size),
                                         dtype=DTYPE)
    else:
        attention_keys = np.asarray(np.random.rand(batch_size,
                                                   attention_time_steps,
                                                   attention_size),
                                    dtype=DTYPE)

        attention_values_np = np.asarray(np.random.rand(
            batch_size, attention_time_steps, encoder_output_size),
                                         dtype=DTYPE)

    attention_values = tf.convert_to_tensor(attention_values_np,
                                            name='attention_values')

    attention_values_length = np.random.randint(1, attention_time_steps + 1,
                                                batch_size)

    attention = Attention(state_size,
                          tf.convert_to_tensor(attention_keys),
                          attention_values,
                          attention_values_length,
                          name='attention_test')

    # random query
    query = np.asarray(np.random.rand(batch_size, state_size), dtype=DTYPE)

    # test normalized scores and context
    scores_normalized, context = attention.compute_scores(query)
    init = tf.global_variables_initializer()

    # get weight
    train_vars = tf.trainable_variables()

    with tf.Session() as sess:
        sess.run(init)
        train_vars_vals = sess.run(train_vars)
        dict_var_vals = {
            k.name.split(':')[0]: v
            for k, v in zip(train_vars, train_vars_vals)
        }
        scores_normalized_tf, context_tf = sess.run(
            [scores_normalized, context])

    # test np implementation
    attention_np = Attention_np(attention_keys,
                                attention_values_np,
                                attention_values_length,
                                name='attention_test')
    graph = snp.Graph()
    graph.initialize(dict_var_vals)

    scores_normalized_np, context_np = attention_np.compute_scores(query)

    np.testing.assert_array_almost_equal(scores_normalized_np,
                                         scores_normalized_tf)
    np.testing.assert_array_almost_equal(context_np, context_tf)
Ejemplo n.º 3
0
def build_attention_model_np(params,
                             dict_var_vals,
                             src_vocab,
                             trg_vocab,
                             source_ids,
                             source_seq_length,
                             beam_size=1,
                             max_step=100):
    """
    Build the model.

    :param params: see `sequencing`.
    :param dict_var_vals: numpy array of trained variables
    :param src_vocab:
    :param trg_vocab:
    :param source_ids:
    :param source_seq_length:
    :param beam_size:
    :param max_step:
    :return:
    """

    mode = MODE.INFER
    batch_size = 1

    graph = sqn.Graph()
    # Because source encoder is different to the target feedback,
    # we construct source_embedding_table manually
    source_embedding_table = sqn.LookUpOp(name='source')

    state_size = params['encoder']['rnn_cell']['state_size']
    init_states = []
    if params['encoder']['rnn_cell']['cell_name'] != 'BasicLSTMCell':
        init_states.append(np.zeros((batch_size, state_size), dtype=DTYPE))
        init_states.append(np.zeros((batch_size, state_size), dtype=DTYPE))
    else:
        init_states.append((np.zeros(
            (batch_size, state_size), dtype=DTYPE), ) * 2)
        init_states.append((np.zeros(
            (batch_size, state_size), dtype=DTYPE), ) * 2)

    encoder = sqn.StackBidirectionalRNNEncoder(params['encoder'],
                                               init_states=init_states,
                                               name='stack_rnn')
    # initialize encoder first
    graph.initialize(dict_var_vals)
    source_embedded = source_embedding_table(source_ids)
    encoded_representation = encoder.encode(source_embedded, source_seq_length)

    attention_keys = encoded_representation.attention_keys
    attention_values = encoded_representation.attention_values
    attention_length = encoded_representation.attention_length

    # feedback
    feedback = sqn.BeamFeedBack(trg_vocab,
                                beam_size,
                                max_step,
                                name='feedback')

    # attention
    attention = sqn.Attention(attention_keys, attention_values,
                              attention_length)

    init_states = []
    if params['decoder']['rnn_cell']['cell_name'] != 'BasicLSTMCell':
        init_states.append(
            np.zeros((batch_size * beam_size, state_size), dtype=DTYPE))
        init_states.append(
            np.zeros((batch_size * beam_size, state_size), dtype=DTYPE))
    else:
        init_states.append((np.zeros(
            (batch_size * beam_size, state_size), dtype=DTYPE), ) * 2)
        init_states.append((np.zeros(
            (batch_size * beam_size, state_size), dtype=DTYPE), ) * 2)
    # decoder
    decoder = sqn.AttentionRNNDecoder(params['decoder'],
                                      attention,
                                      feedback,
                                      init_states=init_states,
                                      mode=mode,
                                      name='attention_decoder')
    # initialize decoder
    graph.initialize(dict_var_vals)

    decoder_output, decoder_final_state = sqn.decode_loop(decoder)

    return decoder_output, decoder_final_state
Ejemplo n.º 4
0
def test_attention_decoder():
    batch_size = 5
    attention_time_steps = 6
    attention_size = 7
    encoder_output_size = 9
    time_steps = 20
    vocab_size = 17
    embedding_dim = 12
    decoder_state_size = 19

    # ---------------------------
    # construct attention
    # random input
    if snp.TIME_MAJOR:
        attention_keys = np.asarray(
            np.random.rand(attention_time_steps, batch_size, attention_size),
            dtype=DTYPE)

        attention_values_np = np.asarray(
            np.random.rand(attention_time_steps, batch_size,
                           encoder_output_size), dtype=DTYPE)
    else:
        attention_keys = np.asarray(
            np.random.rand(batch_size, attention_time_steps, attention_size),
            dtype=DTYPE)

        attention_values_np = np.asarray(
            np.random.rand(batch_size, attention_time_steps,
                           encoder_output_size), dtype=DTYPE)

    attention_values = tf.convert_to_tensor(attention_values_np,
                                            name='attention_values')

    attention_values_length = np.random.randint(1, attention_time_steps + 1,
                                                batch_size)
    attention = Attention(decoder_state_size,
                          tf.convert_to_tensor(attention_keys),
                          attention_values, attention_values_length,
                          name='attention_decoder_test_decode')

    # ----------------------------------------------
    # construct feedback
    vocab = Vocab([chr(ord('a') + i) for i in range(vocab_size)],
                  embedding_dim)

    # dynamical batch size
    inputs = tf.placeholder(tf.int32, shape=(None, None),
                            name='source_ids')
    sequence_length = tf.placeholder(tf.int32, shape=(None,),
                                     name='source_seq_length')

    sequence_length_np = np.random.randint(1, time_steps + 1, batch_size,
                                           dtype=np.int32)

    # inputs to encoder
    if snp.TIME_MAJOR:
        inputs_np = np.random.randint(0, vocab_size, (time_steps, batch_size),
                                      dtype=np.int32)
    else:
        inputs_np = np.random.randint(0, vocab_size, (batch_size, time_steps),
                                      dtype=np.int32)

    feedback = TrainingFeedBack(inputs, sequence_length, vocab,
                                name='attention_decoder_test_decode')

    # --------------------------------------------------
    # construct decoder
    decoder_params = {'rnn_cell': {'cell_name': 'BasicLSTMCell',
                                   'state_size': decoder_state_size,
                                   'num_layers': 2,
                                   'input_keep_prob': 1.0,
                                   'output_keep_prob': 1.0},
                      'trg_vocab_size': vocab_size}

    decoder = AttentionRNNDecoder(decoder_params, attention, feedback,
                                  mode=MODE.EVAL,
                                  name='attention_decoder_test_decode')

    decoder_output, _ = dynamic_decode(decoder)

    init = tf.global_variables_initializer()
    # get weight
    train_vars = tf.trainable_variables()

    with tf.Session() as sess:
        sess.run(init)
        train_vars_vals = sess.run(train_vars)
        dict_var_vals = {k.name.split(':')[0]: v for k, v in zip(train_vars,
                                                                 train_vars_vals)}
        decoder_output_tf = sess.run([decoder_output], feed_dict={inputs:
                                                                      inputs_np,
                                                                  sequence_length: sequence_length_np})

        init_state = decoder.cell.zero_state(batch_size, dtype=DTYPE)

        init_state_np = sess.run([init_state])[0]

    # test np implementation
    attention_np = Attention_np(attention_keys, attention_values_np,
                                attention_values_length,
                                name='attention_decoder_test_decode')

    feedback_np = TrainingFeedBackTest(inputs_np,
                                       sequence_length_np,
                                       vocab,
                                       name='attention_decoder_test_decode')

    decoder_np = AttentionRNNDecoder_np(decoder_params, attention_np,
                                        feedback_np, init_state_np,
                                        name='attention_decoder_test_decode')

    graph = snp.Graph()
    graph.initialize(dict_var_vals)

    decoder_output_np, _ = decode_loop(decoder_np)

    np.testing.assert_array_almost_equal(decoder_output_tf[0].logits,
                                         decoder_output_np['logits'])
    np.testing.assert_array_almost_equal(decoder_output_tf[0].logits,
                                         decoder_output_np['logits'])

    np.testing.assert_array_almost_equal(decoder_output_tf[0].predicted_ids,
                                         decoder_output_np['predicted_ids'])
    np.testing.assert_array_almost_equal(decoder_output_tf[0].predicted_ids,
                                         decoder_output_np['predicted_ids'])
Ejemplo n.º 5
0
def test_training_feedback():
    batch_size = 2
    time_steps = 4
    vocab_size = 17
    embedding_dim = 12
    vocab = Vocab([chr(ord('a') + i) for i in range(vocab_size)],
                  embedding_dim)

    # dynamical batch size
    inputs = tf.placeholder(tf.int32, shape=(None, None), name='source_ids')
    sequence_length = tf.placeholder(tf.int32,
                                     shape=(None, ),
                                     name='source_seq_length')

    feedback = TrainingFeedBack(inputs,
                                sequence_length,
                                vocab,
                                name='feedback_test')

    finished_list = []
    output_list = []

    for i in range(time_steps):
        outputs = feedback.next_inputs(i)
        finished_list.append(outputs[0])
        output_list.append(outputs[1])

    sequence_length_np = np.random.randint(1,
                                           time_steps + 1,
                                           batch_size,
                                           dtype=np.int32)
    sequence_length_np_b2 = np.random.randint(1,
                                              time_steps + 1,
                                              batch_size * 2,
                                              dtype=np.int32)
    if TIME_MAJOR:
        inputs_np = np.random.randint(0,
                                      vocab_size, (time_steps, batch_size),
                                      dtype=np.int32)

        inputs_np_b2 = np.random.randint(0,
                                         vocab_size,
                                         (time_steps, batch_size * 2),
                                         dtype=np.int32)
    else:
        inputs_np = np.random.randint(0,
                                      vocab_size, (batch_size, time_steps),
                                      dtype=np.int32)

        inputs_np_b2 = np.random.randint(0,
                                         vocab_size,
                                         (batch_size * 2, time_steps),
                                         dtype=np.int32)

    init = tf.global_variables_initializer()
    train_vars = tf.trainable_variables()
    with tf.Session() as sess:
        sess.run(init)
        train_vars_vals = sess.run(train_vars)
        dict_var_vals = {
            k.name.split(':')[0]: v
            for k, v in zip(train_vars, train_vars_vals)
        }

        tf_outputs = sess.run(finished_list + output_list,
                              feed_dict={
                                  inputs: inputs_np,
                                  sequence_length: sequence_length_np
                              })

        tf_outputs_b2 = sess.run(finished_list + output_list,
                                 feed_dict={
                                     inputs: inputs_np_b2,
                                     sequence_length: sequence_length_np_b2
                                 })

    # print(inputs_np, tf_outputs, tf_outputs_b2, dict_var_vals)

    feedback_np = TrainingFeedBackTest(inputs_np,
                                       sequence_length_np,
                                       vocab,
                                       name='feedback_test')
    graph = snp.Graph()
    graph.initialize(dict_var_vals)

    for idx in range(time_steps):
        outputs_np = feedback_np.next_inputs(idx)
        np.testing.assert_array_almost_equal(outputs_np[1],
                                             tf_outputs[idx + time_steps])
        np.testing.assert_array_almost_equal(outputs_np[0], tf_outputs[idx])
Ejemplo n.º 6
0
def test_stack_bidir_rnn_encoder_lstm():
    graph = snp.Graph()
    graph.clear_layers()
    stack_bidir_rnn_encoder('BasicLSTMCell', 'bir_lstm')
    graph.clear_layers()
Ejemplo n.º 7
0
def test_stack_bidir_rnn_encoder_gru():
    graph = snp.Graph()
    graph.clear_layers()
    stack_bidir_rnn_encoder('GRUCell')
    graph.clear_layers()
Ejemplo n.º 8
0
def test_stack_bidir_rnn_encoder_rnn():
    graph = snp.Graph()
    graph.clear_layers()
    stack_bidir_rnn_encoder('BasicRNNCell', 'rnn')
    graph.clear_layers()
Ejemplo n.º 9
0
def stack_bidir_rnn_encoder(rnn_cell, name=None):
    time_steps = 4
    hidden_units = 32
    batch_size = 6
    num_layers = 7
    input_size = 8
    attention_size = 9
    time_major = TIME_MAJOR

    params = {
        'rnn_cell': {
            'state_size': hidden_units,
            'cell_name': rnn_cell,
            'num_layers': num_layers,
            'input_keep_prob': 1.0,
            'output_keep_prob': 1.0
        },
        'attention_key_size': attention_size
    }

    encoder = StackBidirectionalRNNEncoder(params, mode=MODE.INFER, name=name)

    # inputs to encoder
    if time_major:
        inputs = np.asarray(np.random.rand(time_steps, batch_size, input_size),
                            dtype=DTYPE)
        sequence_length = np.random.randint(1, time_steps + 1, batch_size)
    else:
        inputs = np.asarray(np.random.rand(batch_size, time_steps, input_size),
                            dtype=DTYPE)
        sequence_length = np.random.randint(1, time_steps + 1, batch_size)
    output = encoder.encode(tf.convert_to_tensor(inputs), sequence_length)

    # get outputs of tensorflow
    init = tf.global_variables_initializer()
    train_vars = tf.trainable_variables()

    with tf.Session() as sess:
        sess.run(init)
        train_vars_vals = sess.run(train_vars)
        dict_var_vals = {
            k.name.split(':')[0]: v
            for k, v in zip(train_vars, train_vars_vals)
        }
        output_tf = sess.run([output[0], output[1], output[2], output[3]])

    init_states = []
    for i in range(num_layers):
        if rnn_cell != 'BasicLSTMCell':
            init_states.append(
                np.zeros((batch_size, hidden_units), dtype=DTYPE))
            init_states.append(
                np.zeros((batch_size, hidden_units), dtype=DTYPE))
        else:
            init_states.append((np.zeros(
                (batch_size, hidden_units), dtype=DTYPE), ) * 2)
            init_states.append((np.zeros(
                (batch_size, hidden_units), dtype=DTYPE), ) * 2)

    encoder_np = snp.StackBidirectionalRNNEncoder(params, init_states, name)
    graph = snp.Graph()
    graph.initialize(dict_var_vals)

    output_np = encoder_np.encode(inputs, sequence_length)

    np.testing.assert_array_almost_equal(output_np[0], output_tf[0])
    np.testing.assert_array_almost_equal(output_np[1], output_tf[1])
    np.testing.assert_array_almost_equal(output_np[2], output_tf[2])
    np.testing.assert_array_almost_equal(output_np[3], output_tf[3])