def rnn_encoder(source_words, target_words, insert_words, delete_words, source_lengths, target_lengths, iw_lengths, dw_lengths, ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, edit_dim, noise_scaler, norm_eps, norm_max, sent_encoder, dropout_keep=1., use_dropout=False, swap_memory=False, enable_vae=True): """ Args: source_words: target_words: insert_words: delete_words: source_lengths: target_lengths: iw_lengths: dw_lengths: ctx_hidden_dim: ctx_hidden_layer: wa_hidden_dim: wa_hidden_layer: edit_dim: noise_scaler: norm_eps: norm_max: dropout_keep: Returns: """ with tf.variable_scope(OPS_NAME): cnx_src, cnx_src_last = sent_encoder(source_words, source_lengths) cnx_tgt, cnx_tgt_last = sent_encoder(target_words, target_lengths) wa = tf.make_template('wa', context_encoder, hidden_dim=wa_hidden_dim, num_layers=wa_hidden_layer, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) wa_inserted = wa(insert_words, iw_lengths) wa_deleted = wa(delete_words, dw_lengths) wa_inserted_last = sequence.last_relevant(wa_inserted, iw_lengths) wa_deleted_last = sequence.last_relevant(wa_deleted, dw_lengths) features = tf.concat([ cnx_src_last, cnx_tgt_last, wa_inserted_last, wa_deleted_last ], axis=1) edit_vector = tf.layers.dense(features, edit_dim, name='encoder_ev') if enable_vae: edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps, norm_max) return edit_vector, wa_inserted, wa_deleted
def bidirectional_encoder(src, src_length, hidden_dim, num_layer, dropout_keep, swap_memory=False, use_dropout=False, reuse=None, name=None): with tf.variable_scope(name, 'encoder', values=[src, src_length], reuse=reuse): def create_rnn_layer(layer_num, dim): cell = tf_rnn.LSTMCell(dim, name='layer_%s' % layer_num) if use_dropout and dropout_keep < 1.: cell = tf_rnn.DropoutWrapper(cell, output_keep_prob=dropout_keep) if layer_num > 0: cell = tf_rnn.ResidualWrapper(cell) return cell batch_size = tf.shape(src)[0] fw_cell = tf_rnn.MultiRNNCell( [create_rnn_layer(i, hidden_dim // 2) for i in range(num_layer)]) bw_cell = tf_rnn.MultiRNNCell( [create_rnn_layer(i, hidden_dim // 2) for i in range(num_layer)]) fw_zero_state = sequence.create_trainable_initial_states( batch_size, fw_cell, 'fw_zs') bw_zero_state = sequence.create_trainable_initial_states( batch_size, bw_cell, 'bw_zs') outputs, state = tf.nn.bidirectional_dynamic_rnn( fw_cell, bw_cell, src, src_length, fw_zero_state, bw_zero_state, swap_memory=swap_memory) output = tf.concat(outputs, axis=2) final_state = sequence.last_relevant(output, src_length) return output, final_state
def attn_encoder(source_words, target_words, insert_words, delete_words, source_lengths, target_lengths, iw_lengths, dw_lengths, ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, meve_hidden_dim, meve_hidden_layers, edit_dim, micro_edit_ev_dim, num_heads, noise_scaler, norm_eps, norm_max, dropout_keep=1., use_dropout=False, swap_memory=False, enable_vae=True): """ Args: source_words: target_words: insert_words: delete_words: source_lengths: target_lengths: iw_lengths: dw_lengths: ctx_hidden_dim: ctx_hidden_layer: wa_hidden_dim: wa_hidden_layer: edit_dim: noise_scaler: norm_eps: norm_max: dropout_keep: Returns: """ with tf.variable_scope(OPS_NAME): cnx_encoder = tf.make_template('cnx_encoder', context_encoder, hidden_dim=ctx_hidden_dim, num_layers=ctx_hidden_layer, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) wa = tf.make_template('wa', context_encoder, hidden_dim=wa_hidden_dim, num_layers=wa_hidden_layer, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) wa_inserted = wa(insert_words, iw_lengths) wa_deleted = wa(delete_words, dw_lengths) wa_inserted_last = sequence.last_relevant(wa_inserted, iw_lengths) wa_deleted_last = sequence.last_relevant(wa_deleted, dw_lengths) if use_dropout and dropout_keep < 1.: wa_inserted_last = tf.nn.dropout(wa_inserted_last, dropout_keep) wa_deleted_last = tf.nn.dropout(wa_deleted_last, dropout_keep) cnx_src = cnx_encoder(source_words, source_lengths) cnx_tgt = cnx_encoder(target_words, target_lengths) # bs x seq_len x micro_edit_vec_dim micro_evs_st, micro_evs_ts = create_micro_edit_vectors( cnx_src, cnx_tgt, source_lengths, target_lengths, ctx_hidden_dim, num_heads, micro_edit_ev_dim, dropout_keep, use_dropout ) micro_ev_encoder = tf.make_template('micro_ev_encoder', context_encoder, hidden_dim=meve_hidden_dim, num_layers=meve_hidden_layers, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) aggreg_mev_st = micro_ev_encoder(micro_evs_st, source_lengths) aggreg_mev_ts = micro_ev_encoder(micro_evs_ts, target_lengths) aggreg_mev_st_last = sequence.last_relevant(aggreg_mev_st, source_lengths) aggreg_mev_ts_last = sequence.last_relevant(aggreg_mev_ts, target_lengths) if use_dropout and dropout_keep < 1.: aggreg_mev_st_last = tf.nn.dropout(aggreg_mev_st_last, dropout_keep) aggreg_mev_ts_last = tf.nn.dropout(aggreg_mev_ts_last, dropout_keep) features = tf.concat([ aggreg_mev_st_last, aggreg_mev_ts_last, wa_inserted_last, wa_deleted_last ], axis=1) edit_vector = tf.layers.dense(features, edit_dim, use_bias=False, name='encoder_ev') if enable_vae: edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps, norm_max) is_training = tf.get_collection('is_training')[0] edit_vector = tf.layers.batch_normalization(edit_vector, name='ev_normalizer', training=is_training) return edit_vector, tf.constant([[0]]), tf.constant([[0]])
def attn_encoder(source_words, target_words, insert_words, delete_words, source_lengths, target_lengths, iw_lengths, dw_lengths, transformer_params, wa_hidden_dim, meve_hidden_dim, meve_hidden_layers, edit_dim, micro_edit_ev_dim, noise_scaler, norm_eps, norm_max, dropout_keep=1., use_dropout=False, swap_memory=False, enable_vae=True): """ Args: source_words: target_words: insert_words: delete_words: source_lengths: target_lengths: iw_lengths: dw_lengths: ctx_hidden_dim: ctx_hidden_layer: wa_hidden_dim: wa_hidden_layer: edit_dim: noise_scaler: norm_eps: norm_max: dropout_keep: Returns: """ with tf.variable_scope(OPS_NAME): wa_inserted_last, wa_deleted_last = wa_accumulator( insert_words, delete_words, iw_lengths, dw_lengths, wa_hidden_dim) if use_dropout and dropout_keep < 1.: wa_inserted_last = tf.nn.dropout(wa_inserted_last, dropout_keep) wa_deleted_last = tf.nn.dropout(wa_deleted_last, dropout_keep) embedding_matrix = vocab.get_embeddings() embedding_layer = ConcatPosEmbedding( transformer_params.hidden_size, embedding_matrix, transformer_params.pos_encoding_dim) micro_ev_projection = tf.layers.Dense(micro_edit_ev_dim, activation=None, use_bias=True, name='micro_ev_proj') mev_extractor = TransformerMicroEditExtractor(embedding_layer, micro_ev_projection, transformer_params) cnx_tgt, micro_evs_st = mev_extractor(source_words, target_words, source_lengths, target_lengths) cnx_src, micro_evs_ts = mev_extractor(target_words, source_words, target_lengths, source_lengths) micro_ev_encoder = tf.make_template('micro_ev_encoder', context_encoder, hidden_dim=meve_hidden_dim, num_layers=meve_hidden_layers, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) aggreg_mev_st = micro_ev_encoder(micro_evs_st, source_lengths) aggreg_mev_ts = micro_ev_encoder(micro_evs_ts, target_lengths) aggreg_mev_st_last = sequence.last_relevant(aggreg_mev_st, source_lengths) aggreg_mev_ts_last = sequence.last_relevant(aggreg_mev_ts, target_lengths) if use_dropout and dropout_keep < 1.: aggreg_mev_st_last = tf.nn.dropout(aggreg_mev_st_last, dropout_keep) aggreg_mev_ts_last = tf.nn.dropout(aggreg_mev_ts_last, dropout_keep) features = tf.concat([ aggreg_mev_st_last, aggreg_mev_ts_last, wa_inserted_last, wa_deleted_last ], axis=1) edit_vector = tf.layers.dense(features, edit_dim, use_bias=False, name='encoder_ev') if enable_vae: edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps, norm_max) return edit_vector, (cnx_src, micro_evs_st), (cnx_tgt, micro_evs_ts)
def test_decoder_prepares(dataset_file, embedding_file): with tf.Graph().as_default(): d_fn, gold_dataset = dataset_file e_fn, gold_embeds = embedding_file v, embed_matrix = vocab.read_word_embeddings(e_fn, EMBED_DIM) vocab_lookup = vocab.get_vocab_lookup(v) stop_token = tf.constant(bytes(vocab.STOP_TOKEN, encoding='utf8'), dtype=tf.string) stop_token_id = vocab_lookup.lookup(stop_token) start_token = tf.constant(bytes(vocab.START_TOKEN, encoding='utf8'), dtype=tf.string) start_token_id = vocab_lookup.lookup(start_token) pad_token = tf.constant(bytes(vocab.PAD_TOKEN, encoding='utf8'), dtype=tf.string) pad_token_id = vocab_lookup.lookup(pad_token) dataset = neural_editor.input_fn(d_fn, vocab_lookup, BATCH_SIZE, NUM_EPOCH) iter = dataset.make_initializable_iterator() (_, tgt, _, _), _ = iter.get_next() tgt_len = sequence.length_pre_embedding(tgt) dec_inputs = decoder.prepare_decoder_inputs(tgt, start_token_id) dec_outputs = decoder.prepare_decoder_output(tgt, tgt_len, stop_token_id, pad_token_id) dec_inputs_len = sequence.length_pre_embedding(dec_inputs) dec_outputs_len = sequence.length_pre_embedding(dec_outputs) dec_outputs_last = sequence.last_relevant( tf.expand_dims(dec_outputs, 2), dec_outputs_len) dec_outputs_last = tf.squeeze(dec_outputs_last) with tf.Session() as sess: sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer() ]) sess.run(iter.initializer) while True: try: dec_inputs, dec_outputs, tgt_len, dil, dol, start_token_id, stop_token_id, dec_outputs_last, tgt = sess.run( [ dec_inputs, dec_outputs, tgt_len, dec_inputs_len, dec_outputs_len, start_token_id, stop_token_id, dec_outputs_last, tgt ]) assert list(dil) == list(dol) == list(tgt_len + 1) assert list(dec_inputs[:, 0]) == list( np.ones_like(dec_inputs[:, 0]) * start_token_id) assert list(dec_outputs_last) == list( np.ones_like(dec_outputs_last) * stop_token_id) except: break
def attn_encoder(source_words, target_words, insert_words, delete_words, source_lengths, target_lengths, iw_lengths, dw_lengths, ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, edit_dim, micro_edit_ev_dim, num_heads, noise_scaler, norm_eps, norm_max, dropout_keep=1., use_dropout=False, swap_memory=False, enable_vae=True): """ Args: source_words: target_words: insert_words: delete_words: source_lengths: target_lengths: iw_lengths: dw_lengths: ctx_hidden_dim: ctx_hidden_layer: wa_hidden_dim: wa_hidden_layer: edit_dim: noise_scaler: norm_eps: norm_max: dropout_keep: Returns: """ print("RUn me111!!") with tf.variable_scope(OPS_NAME): cnx_encoder = tf.make_template('cnx_encoder', context_encoder, hidden_dim=ctx_hidden_dim, num_layers=ctx_hidden_layer, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) wa = tf.make_template('wa', word_aggregator, hidden_dim=wa_hidden_dim, num_layers=wa_hidden_layer, swap_memory=swap_memory, use_dropout=use_dropout, dropout_keep=dropout_keep) wa_inserted = wa(insert_words, iw_lengths) wa_deleted = wa(delete_words, dw_lengths) wa_inserted_last = sequence.last_relevant(wa_inserted, iw_lengths) wa_deleted_last = sequence.last_relevant(wa_deleted, dw_lengths) if use_dropout and dropout_keep < 1.: wa_inserted_last = tf.nn.dropout(wa_inserted_last, dropout_keep) wa_deleted_last = tf.nn.dropout(wa_deleted_last, dropout_keep) cnx_src = cnx_encoder(source_words, source_lengths) cnx_tgt = cnx_encoder(target_words, target_lengths) # bs x seq_len x micro_edit_vec_dim micro_evs_st, micro_evs_ts = create_micro_edit_vectors( cnx_src, cnx_tgt, source_lengths, target_lengths, ctx_hidden_dim, num_heads, micro_edit_ev_dim, dropout_keep, use_dropout) micro_evs_st = masked_fill(micro_evs_st, source_lengths, -1e9) micro_evs_ts = masked_fill(micro_evs_ts, target_lengths, -1e9) max_mev_st = tf.reduce_max(micro_evs_st, axis=1) # bs x micro_edit_vec_dim max_mev_ts = tf.reduce_max(micro_evs_ts, axis=1) # bs x micro_edit_vec_dim micro_ev_final_nodes = int( micro_edit_ev_dim / (micro_edit_ev_dim + wa_hidden_dim) * edit_dim) wa_final_nodes = int(wa_hidden_dim / (micro_edit_ev_dim + wa_hidden_dim) * edit_dim) micro_evs_prenoise = tf.layers.Dense(micro_ev_final_nodes // 2, activation=None, use_bias=False) wa_prenoise = tf.layers.Dense(wa_final_nodes // 2, activation=None, use_bias=False) edit_vector = tf.concat([ micro_evs_prenoise(max_mev_st), micro_evs_prenoise(max_mev_ts), wa_prenoise(wa_inserted_last), wa_prenoise(wa_deleted_last) ], axis=1) if enable_vae: edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps, norm_max) return edit_vector, tf.constant([[0]]), tf.constant([[0]])
def rnn_encoder(source_words, target_words, insert_words, delete_words, source_lengths, target_lengths, iw_lengths, dw_lengths, ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, edit_dim, noise_scaler, norm_eps, norm_max, dropout_keep): """ Args: source_words: target_words: insert_words: delete_words: source_lengths: target_lengths: iw_lengths: dw_lengths: ctx_hidden_dim: ctx_hidden_layer: wa_hidden_dim: wa_hidden_layer: edit_dim: noise_scaler: norm_eps: norm_max: dropout_keep: Returns: """ with tf.variable_scope(OPS_NAME): cnx_encoder = tf.make_template('cnx_encoder', context_encoder, hidden_dim=ctx_hidden_dim, num_layers=ctx_hidden_layer) cnx_src = cnx_encoder(source_words, source_lengths) cnx_tgt = cnx_encoder(target_words, target_lengths) cnx_src_last = sequence.last_relevant(cnx_src, source_lengths) cnx_tgt_last = sequence.last_relevant(cnx_tgt, target_lengths) cnx_src_last = tf.nn.dropout(cnx_src_last, dropout_keep) cnx_tgt_last = tf.nn.dropout(cnx_tgt_last, dropout_keep) wa = tf.make_template('wa', word_aggregator, hidden_dim=wa_hidden_dim, num_layers=wa_hidden_layer) wa_inserted = wa(insert_words, iw_lengths) wa_deleted = wa(delete_words, dw_lengths) wa_inserted_last = sequence.last_relevant(wa_inserted, iw_lengths) wa_deleted_last = sequence.last_relevant(wa_deleted, dw_lengths) wa_inserted_last = tf.nn.dropout(wa_inserted_last, dropout_keep) wa_deleted_last = tf.nn.dropout(wa_deleted_last, dropout_keep) features = tf.concat( [cnx_src_last, cnx_tgt_last, wa_inserted_last, wa_deleted_last], axis=1) edit_vector = tf.layers.dense(features, edit_dim, name='encoder_ev') noised_edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps, norm_max) return noised_edit_vector