def test_gru(): hidden_units = 33 batch_size = 13 time_steps = 7 embedding_size = 8 time_major = snp.TIME_MAJOR # tensorflow results gru = tf.contrib.rnn.GRUCell(hidden_units, activation=tf.nn.tanh) # initial state of the GRU state = np.repeat(np.asarray(np.random.rand(1, hidden_units), dtype=DTYPE), batch_size, axis=0) # inputs to GRU if time_major: inputs = np.asarray(np.random.rand(time_steps, batch_size, embedding_size), dtype=DTYPE) sequence_length = np.random.randint(1, time_steps + 1, batch_size) else: inputs = np.asarray(np.random.rand(batch_size, time_steps, embedding_size), dtype=DTYPE) sequence_length = np.random.randint(1, time_steps + 1, batch_size) outputs, _ = tf.nn.dynamic_rnn(cell=gru, inputs=tf.convert_to_tensor(inputs), sequence_length=sequence_length, initial_state=state, dtype=DTYPE, time_major=time_major) # get outputs of tensorflow init = tf.global_variables_initializer() train_vars = tf.trainable_variables() with tf.Session() as sess: sess.run(init) train_vars_vals = sess.run(train_vars) dict_var_vals = { k.name.split(':')[0]: v for k, v in zip(train_vars, train_vars_vals) } outputs_tf = sess.run(outputs) # test numpy implementation gru_np = snp.GRUCell(state, activation=np.tanh, base_name='rnn') graph = snp.Graph() graph.initialize(dict_var_vals) outputs_np, _ = gru_np.encode(inputs, sequence_length=sequence_length) np.testing.assert_array_almost_equal(outputs_tf, outputs_np)
def test_attention(): batch_size = 5 attention_time_steps = 6 attention_size = 7 state_size = 8 encoder_output_size = 9 # random input if snp.TIME_MAJOR: attention_keys = np.asarray(np.random.rand(attention_time_steps, batch_size, attention_size), dtype=DTYPE) attention_values_np = np.asarray(np.random.rand( attention_time_steps, batch_size, encoder_output_size), dtype=DTYPE) else: attention_keys = np.asarray(np.random.rand(batch_size, attention_time_steps, attention_size), dtype=DTYPE) attention_values_np = np.asarray(np.random.rand( batch_size, attention_time_steps, encoder_output_size), dtype=DTYPE) attention_values = tf.convert_to_tensor(attention_values_np, name='attention_values') attention_values_length = np.random.randint(1, attention_time_steps + 1, batch_size) attention = Attention(state_size, tf.convert_to_tensor(attention_keys), attention_values, attention_values_length, name='attention_test') # random query query = np.asarray(np.random.rand(batch_size, state_size), dtype=DTYPE) # test normalized scores and context scores_normalized, context = attention.compute_scores(query) init = tf.global_variables_initializer() # get weight train_vars = tf.trainable_variables() with tf.Session() as sess: sess.run(init) train_vars_vals = sess.run(train_vars) dict_var_vals = { k.name.split(':')[0]: v for k, v in zip(train_vars, train_vars_vals) } scores_normalized_tf, context_tf = sess.run( [scores_normalized, context]) # test np implementation attention_np = Attention_np(attention_keys, attention_values_np, attention_values_length, name='attention_test') graph = snp.Graph() graph.initialize(dict_var_vals) scores_normalized_np, context_np = attention_np.compute_scores(query) np.testing.assert_array_almost_equal(scores_normalized_np, scores_normalized_tf) np.testing.assert_array_almost_equal(context_np, context_tf)
def build_attention_model_np(params, dict_var_vals, src_vocab, trg_vocab, source_ids, source_seq_length, beam_size=1, max_step=100): """ Build the model. :param params: see `sequencing`. :param dict_var_vals: numpy array of trained variables :param src_vocab: :param trg_vocab: :param source_ids: :param source_seq_length: :param beam_size: :param max_step: :return: """ mode = MODE.INFER batch_size = 1 graph = sqn.Graph() # Because source encoder is different to the target feedback, # we construct source_embedding_table manually source_embedding_table = sqn.LookUpOp(name='source') state_size = params['encoder']['rnn_cell']['state_size'] init_states = [] if params['encoder']['rnn_cell']['cell_name'] != 'BasicLSTMCell': init_states.append(np.zeros((batch_size, state_size), dtype=DTYPE)) init_states.append(np.zeros((batch_size, state_size), dtype=DTYPE)) else: init_states.append((np.zeros( (batch_size, state_size), dtype=DTYPE), ) * 2) init_states.append((np.zeros( (batch_size, state_size), dtype=DTYPE), ) * 2) encoder = sqn.StackBidirectionalRNNEncoder(params['encoder'], init_states=init_states, name='stack_rnn') # initialize encoder first graph.initialize(dict_var_vals) source_embedded = source_embedding_table(source_ids) encoded_representation = encoder.encode(source_embedded, source_seq_length) attention_keys = encoded_representation.attention_keys attention_values = encoded_representation.attention_values attention_length = encoded_representation.attention_length # feedback feedback = sqn.BeamFeedBack(trg_vocab, beam_size, max_step, name='feedback') # attention attention = sqn.Attention(attention_keys, attention_values, attention_length) init_states = [] if params['decoder']['rnn_cell']['cell_name'] != 'BasicLSTMCell': init_states.append( np.zeros((batch_size * beam_size, state_size), dtype=DTYPE)) init_states.append( np.zeros((batch_size * beam_size, state_size), dtype=DTYPE)) else: init_states.append((np.zeros( (batch_size * beam_size, state_size), dtype=DTYPE), ) * 2) init_states.append((np.zeros( (batch_size * beam_size, state_size), dtype=DTYPE), ) * 2) # decoder decoder = sqn.AttentionRNNDecoder(params['decoder'], attention, feedback, init_states=init_states, mode=mode, name='attention_decoder') # initialize decoder graph.initialize(dict_var_vals) decoder_output, decoder_final_state = sqn.decode_loop(decoder) return decoder_output, decoder_final_state
def test_attention_decoder(): batch_size = 5 attention_time_steps = 6 attention_size = 7 encoder_output_size = 9 time_steps = 20 vocab_size = 17 embedding_dim = 12 decoder_state_size = 19 # --------------------------- # construct attention # random input if snp.TIME_MAJOR: attention_keys = np.asarray( np.random.rand(attention_time_steps, batch_size, attention_size), dtype=DTYPE) attention_values_np = np.asarray( np.random.rand(attention_time_steps, batch_size, encoder_output_size), dtype=DTYPE) else: attention_keys = np.asarray( np.random.rand(batch_size, attention_time_steps, attention_size), dtype=DTYPE) attention_values_np = np.asarray( np.random.rand(batch_size, attention_time_steps, encoder_output_size), dtype=DTYPE) attention_values = tf.convert_to_tensor(attention_values_np, name='attention_values') attention_values_length = np.random.randint(1, attention_time_steps + 1, batch_size) attention = Attention(decoder_state_size, tf.convert_to_tensor(attention_keys), attention_values, attention_values_length, name='attention_decoder_test_decode') # ---------------------------------------------- # construct feedback vocab = Vocab([chr(ord('a') + i) for i in range(vocab_size)], embedding_dim) # dynamical batch size inputs = tf.placeholder(tf.int32, shape=(None, None), name='source_ids') sequence_length = tf.placeholder(tf.int32, shape=(None,), name='source_seq_length') sequence_length_np = np.random.randint(1, time_steps + 1, batch_size, dtype=np.int32) # inputs to encoder if snp.TIME_MAJOR: inputs_np = np.random.randint(0, vocab_size, (time_steps, batch_size), dtype=np.int32) else: inputs_np = np.random.randint(0, vocab_size, (batch_size, time_steps), dtype=np.int32) feedback = TrainingFeedBack(inputs, sequence_length, vocab, name='attention_decoder_test_decode') # -------------------------------------------------- # construct decoder decoder_params = {'rnn_cell': {'cell_name': 'BasicLSTMCell', 'state_size': decoder_state_size, 'num_layers': 2, 'input_keep_prob': 1.0, 'output_keep_prob': 1.0}, 'trg_vocab_size': vocab_size} decoder = AttentionRNNDecoder(decoder_params, attention, feedback, mode=MODE.EVAL, name='attention_decoder_test_decode') decoder_output, _ = dynamic_decode(decoder) init = tf.global_variables_initializer() # get weight train_vars = tf.trainable_variables() with tf.Session() as sess: sess.run(init) train_vars_vals = sess.run(train_vars) dict_var_vals = {k.name.split(':')[0]: v for k, v in zip(train_vars, train_vars_vals)} decoder_output_tf = sess.run([decoder_output], feed_dict={inputs: inputs_np, sequence_length: sequence_length_np}) init_state = decoder.cell.zero_state(batch_size, dtype=DTYPE) init_state_np = sess.run([init_state])[0] # test np implementation attention_np = Attention_np(attention_keys, attention_values_np, attention_values_length, name='attention_decoder_test_decode') feedback_np = TrainingFeedBackTest(inputs_np, sequence_length_np, vocab, name='attention_decoder_test_decode') decoder_np = AttentionRNNDecoder_np(decoder_params, attention_np, feedback_np, init_state_np, name='attention_decoder_test_decode') graph = snp.Graph() graph.initialize(dict_var_vals) decoder_output_np, _ = decode_loop(decoder_np) np.testing.assert_array_almost_equal(decoder_output_tf[0].logits, decoder_output_np['logits']) np.testing.assert_array_almost_equal(decoder_output_tf[0].logits, decoder_output_np['logits']) np.testing.assert_array_almost_equal(decoder_output_tf[0].predicted_ids, decoder_output_np['predicted_ids']) np.testing.assert_array_almost_equal(decoder_output_tf[0].predicted_ids, decoder_output_np['predicted_ids'])
def test_training_feedback(): batch_size = 2 time_steps = 4 vocab_size = 17 embedding_dim = 12 vocab = Vocab([chr(ord('a') + i) for i in range(vocab_size)], embedding_dim) # dynamical batch size inputs = tf.placeholder(tf.int32, shape=(None, None), name='source_ids') sequence_length = tf.placeholder(tf.int32, shape=(None, ), name='source_seq_length') feedback = TrainingFeedBack(inputs, sequence_length, vocab, name='feedback_test') finished_list = [] output_list = [] for i in range(time_steps): outputs = feedback.next_inputs(i) finished_list.append(outputs[0]) output_list.append(outputs[1]) sequence_length_np = np.random.randint(1, time_steps + 1, batch_size, dtype=np.int32) sequence_length_np_b2 = np.random.randint(1, time_steps + 1, batch_size * 2, dtype=np.int32) if TIME_MAJOR: inputs_np = np.random.randint(0, vocab_size, (time_steps, batch_size), dtype=np.int32) inputs_np_b2 = np.random.randint(0, vocab_size, (time_steps, batch_size * 2), dtype=np.int32) else: inputs_np = np.random.randint(0, vocab_size, (batch_size, time_steps), dtype=np.int32) inputs_np_b2 = np.random.randint(0, vocab_size, (batch_size * 2, time_steps), dtype=np.int32) init = tf.global_variables_initializer() train_vars = tf.trainable_variables() with tf.Session() as sess: sess.run(init) train_vars_vals = sess.run(train_vars) dict_var_vals = { k.name.split(':')[0]: v for k, v in zip(train_vars, train_vars_vals) } tf_outputs = sess.run(finished_list + output_list, feed_dict={ inputs: inputs_np, sequence_length: sequence_length_np }) tf_outputs_b2 = sess.run(finished_list + output_list, feed_dict={ inputs: inputs_np_b2, sequence_length: sequence_length_np_b2 }) # print(inputs_np, tf_outputs, tf_outputs_b2, dict_var_vals) feedback_np = TrainingFeedBackTest(inputs_np, sequence_length_np, vocab, name='feedback_test') graph = snp.Graph() graph.initialize(dict_var_vals) for idx in range(time_steps): outputs_np = feedback_np.next_inputs(idx) np.testing.assert_array_almost_equal(outputs_np[1], tf_outputs[idx + time_steps]) np.testing.assert_array_almost_equal(outputs_np[0], tf_outputs[idx])
def test_stack_bidir_rnn_encoder_lstm(): graph = snp.Graph() graph.clear_layers() stack_bidir_rnn_encoder('BasicLSTMCell', 'bir_lstm') graph.clear_layers()
def test_stack_bidir_rnn_encoder_gru(): graph = snp.Graph() graph.clear_layers() stack_bidir_rnn_encoder('GRUCell') graph.clear_layers()
def test_stack_bidir_rnn_encoder_rnn(): graph = snp.Graph() graph.clear_layers() stack_bidir_rnn_encoder('BasicRNNCell', 'rnn') graph.clear_layers()
def stack_bidir_rnn_encoder(rnn_cell, name=None): time_steps = 4 hidden_units = 32 batch_size = 6 num_layers = 7 input_size = 8 attention_size = 9 time_major = TIME_MAJOR params = { 'rnn_cell': { 'state_size': hidden_units, 'cell_name': rnn_cell, 'num_layers': num_layers, 'input_keep_prob': 1.0, 'output_keep_prob': 1.0 }, 'attention_key_size': attention_size } encoder = StackBidirectionalRNNEncoder(params, mode=MODE.INFER, name=name) # inputs to encoder if time_major: inputs = np.asarray(np.random.rand(time_steps, batch_size, input_size), dtype=DTYPE) sequence_length = np.random.randint(1, time_steps + 1, batch_size) else: inputs = np.asarray(np.random.rand(batch_size, time_steps, input_size), dtype=DTYPE) sequence_length = np.random.randint(1, time_steps + 1, batch_size) output = encoder.encode(tf.convert_to_tensor(inputs), sequence_length) # get outputs of tensorflow init = tf.global_variables_initializer() train_vars = tf.trainable_variables() with tf.Session() as sess: sess.run(init) train_vars_vals = sess.run(train_vars) dict_var_vals = { k.name.split(':')[0]: v for k, v in zip(train_vars, train_vars_vals) } output_tf = sess.run([output[0], output[1], output[2], output[3]]) init_states = [] for i in range(num_layers): if rnn_cell != 'BasicLSTMCell': init_states.append( np.zeros((batch_size, hidden_units), dtype=DTYPE)) init_states.append( np.zeros((batch_size, hidden_units), dtype=DTYPE)) else: init_states.append((np.zeros( (batch_size, hidden_units), dtype=DTYPE), ) * 2) init_states.append((np.zeros( (batch_size, hidden_units), dtype=DTYPE), ) * 2) encoder_np = snp.StackBidirectionalRNNEncoder(params, init_states, name) graph = snp.Graph() graph.initialize(dict_var_vals) output_np = encoder_np.encode(inputs, sequence_length) np.testing.assert_array_almost_equal(output_np[0], output_tf[0]) np.testing.assert_array_almost_equal(output_np[1], output_tf[1]) np.testing.assert_array_almost_equal(output_np[2], output_tf[2]) np.testing.assert_array_almost_equal(output_np[3], output_tf[3])