예제 #1
0
def rnn_encoder(source_words, target_words, insert_words, delete_words,
                source_lengths, target_lengths, iw_lengths, dw_lengths,
                ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer,
                edit_dim, noise_scaler, norm_eps, norm_max, sent_encoder,
                dropout_keep=1., use_dropout=False, swap_memory=False, enable_vae=True):
    """
    Args:
        source_words:
        target_words:
        insert_words:
        delete_words:
        source_lengths:
        target_lengths:
        iw_lengths:
        dw_lengths:
        ctx_hidden_dim:
        ctx_hidden_layer:
        wa_hidden_dim:
        wa_hidden_layer:
        edit_dim:
        noise_scaler:
        norm_eps:
        norm_max:
        dropout_keep:

    Returns:

    """
    with tf.variable_scope(OPS_NAME):
        cnx_src, cnx_src_last = sent_encoder(source_words, source_lengths)
        cnx_tgt, cnx_tgt_last = sent_encoder(target_words, target_lengths)

        wa = tf.make_template('wa', context_encoder,
                              hidden_dim=wa_hidden_dim,
                              num_layers=wa_hidden_layer,
                              swap_memory=swap_memory,
                              use_dropout=use_dropout,
                              dropout_keep=dropout_keep)

        wa_inserted = wa(insert_words, iw_lengths)
        wa_deleted = wa(delete_words, dw_lengths)

        wa_inserted_last = sequence.last_relevant(wa_inserted, iw_lengths)
        wa_deleted_last = sequence.last_relevant(wa_deleted, dw_lengths)

        features = tf.concat([
            cnx_src_last,
            cnx_tgt_last,
            wa_inserted_last,
            wa_deleted_last
        ], axis=1)

        edit_vector = tf.layers.dense(features, edit_dim, name='encoder_ev')

        if enable_vae:
            edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps, norm_max)

        return edit_vector, wa_inserted, wa_deleted
def create_micro_edit_vectors(cnx_src, cnx_tgt, src_lengths, tgt_lengths,
                              d_model, num_heads, micro_ev_dim, noise_scaler, norm_eps, norm_max,
                              dropout_keep=1., use_dropout=False):
    assert d_model % num_heads == 0
    d_small = int(d_model // num_heads)

    st_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='src_tgt_attn')
    ts_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='tgt_src_attn')

    attn_src_tgt = st_mha([cnx_src, cnx_tgt, cnx_tgt, tgt_lengths])  # bs x src_seq_len x word_dim
    attn_tgt_src = ts_mha([cnx_tgt, cnx_src, cnx_src, src_lengths])  # bs x tgt_seq_len x word_dim

    if use_dropout and dropout_keep < 1.:
        attn_src_tgt = tf.nn.dropout(attn_src_tgt, dropout_keep)
        attn_tgt_src = tf.nn.dropout(attn_tgt_src, dropout_keep)

    micro_edit_feed_st = tf.concat([cnx_src, attn_src_tgt], axis=2)  # bs x src_seq_len x 2*word_dim
    micro_edit_feed_ts = tf.concat([cnx_tgt, attn_tgt_src], axis=2)  # bs x src_seq_len x 2*word_dim

    micro_ev_st_creator = tf.layers.Dense(micro_ev_dim, name='micro_ev_st_creator')
    micro_evs_st = micro_ev_st_creator(micro_edit_feed_st)  # bs x src_seq_len x micro_edit_vec_dim
    micro_evs_ts = micro_ev_st_creator(micro_edit_feed_ts)  # bs x src_seq_len x micro_edit_vec_dim

    org_shape = tf.shape(micro_evs_st)
    micro_evs_st = sample_vMF(tf.reshape(micro_evs_st, (-1, micro_ev_dim)), noise_scaler, norm_eps, norm_max)
    micro_evs_st = tf.reshape(micro_evs_st, org_shape)

    org_shape = tf.shape(micro_evs_ts)
    micro_evs_ts = sample_vMF(tf.reshape(micro_evs_ts, (-1, micro_ev_dim)), noise_scaler, norm_eps, norm_max)
    micro_evs_ts = tf.reshape(micro_evs_ts, org_shape)

    if use_dropout and dropout_keep < 1.:
        micro_evs_st = tf.nn.dropout(micro_evs_st, dropout_keep)
        micro_evs_ts = tf.nn.dropout(micro_evs_ts, dropout_keep)

    return micro_evs_st, micro_evs_ts
def test_sample_vMF():
    with tf.Graph().as_default():
        dim = 2
        kappa = 100.
        norm_eps = 0.1
        norm_max = 14

        batch_size = tf.placeholder(tf.int32, shape=())
        m = tf.ones((batch_size, dim)) * 2
        noisy = ev.sample_vMF(m, kappa, norm_eps, norm_max)

        with tf.Session() as sess:
            sess.run([
                tf.global_variables_initializer(),
                tf.local_variables_initializer(),
                tf.tables_initializer()
            ])
            o = sess.run(noisy, feed_dict={batch_size: 4})
            print()
            print(o)
def attn_encoder(source_words, target_words, insert_words, delete_words,
                 source_lengths, target_lengths, iw_lengths, dw_lengths,
                 ctx_hidden_dim, ctx_hidden_layer, wa_hidden_dim, wa_hidden_layer, meve_hidden_dim, meve_hidden_layers,
                 edit_dim, micro_edit_ev_dim, num_heads, noise_scaler, norm_eps, norm_max,
                 dropout_keep=1., use_dropout=False, swap_memory=False, enable_vae=True):
    """
    Args:
        source_words:
        target_words:
        insert_words:
        delete_words:
        source_lengths:
        target_lengths:
        iw_lengths:
        dw_lengths:
        ctx_hidden_dim:
        ctx_hidden_layer:
        wa_hidden_dim:
        wa_hidden_layer:
        edit_dim:
        noise_scaler:
        norm_eps:
        norm_max:
        dropout_keep:

    Returns:

    """
    with tf.variable_scope(OPS_NAME):
        cnx_encoder = tf.make_template('cnx_encoder', context_encoder,
                                       hidden_dim=ctx_hidden_dim,
                                       num_layers=ctx_hidden_layer,
                                       swap_memory=swap_memory,
                                       use_dropout=use_dropout,
                                       dropout_keep=dropout_keep)

        wa = tf.make_template('wa', context_encoder,
                              hidden_dim=wa_hidden_dim,
                              num_layers=wa_hidden_layer,
                              swap_memory=swap_memory,
                              use_dropout=use_dropout,
                              dropout_keep=dropout_keep)

        wa_inserted = wa(insert_words, iw_lengths)
        wa_deleted = wa(delete_words, dw_lengths)

        wa_inserted_last = sequence.last_relevant(wa_inserted, iw_lengths)
        wa_deleted_last = sequence.last_relevant(wa_deleted, dw_lengths)

        if use_dropout and dropout_keep < 1.:
            wa_inserted_last = tf.nn.dropout(wa_inserted_last, dropout_keep)
            wa_deleted_last = tf.nn.dropout(wa_deleted_last, dropout_keep)

        cnx_src = cnx_encoder(source_words, source_lengths)
        cnx_tgt = cnx_encoder(target_words, target_lengths)

        # bs x seq_len x micro_edit_vec_dim
        micro_evs_st, micro_evs_ts = create_micro_edit_vectors(
            cnx_src, cnx_tgt, source_lengths, target_lengths,
            ctx_hidden_dim, num_heads, micro_edit_ev_dim,
            dropout_keep, use_dropout
        )

        micro_ev_encoder = tf.make_template('micro_ev_encoder', context_encoder,
                                            hidden_dim=meve_hidden_dim,
                                            num_layers=meve_hidden_layers,
                                            swap_memory=swap_memory,
                                            use_dropout=use_dropout,
                                            dropout_keep=dropout_keep)

        aggreg_mev_st = micro_ev_encoder(micro_evs_st, source_lengths)
        aggreg_mev_ts = micro_ev_encoder(micro_evs_ts, target_lengths)

        aggreg_mev_st_last = sequence.last_relevant(aggreg_mev_st, source_lengths)
        aggreg_mev_ts_last = sequence.last_relevant(aggreg_mev_ts, target_lengths)

        if use_dropout and dropout_keep < 1.:
            aggreg_mev_st_last = tf.nn.dropout(aggreg_mev_st_last, dropout_keep)
            aggreg_mev_ts_last = tf.nn.dropout(aggreg_mev_ts_last, dropout_keep)

        features = tf.concat([
            aggreg_mev_st_last,
            aggreg_mev_ts_last,
            wa_inserted_last,
            wa_deleted_last
        ], axis=1)

        edit_vector = tf.layers.dense(features, edit_dim, use_bias=False, name='encoder_ev')

        if enable_vae:
            edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps, norm_max)

        is_training = tf.get_collection('is_training')[0]
        edit_vector = tf.layers.batch_normalization(edit_vector, name='ev_normalizer', training=is_training)

        return edit_vector, tf.constant([[0]]), tf.constant([[0]])
예제 #5
0
def attn_encoder(source_words,
                 target_words,
                 insert_words,
                 delete_words,
                 source_lengths,
                 target_lengths,
                 iw_lengths,
                 dw_lengths,
                 transformer_params,
                 wa_hidden_dim,
                 meve_hidden_dim,
                 meve_hidden_layers,
                 edit_dim,
                 micro_edit_ev_dim,
                 noise_scaler,
                 norm_eps,
                 norm_max,
                 dropout_keep=1.,
                 use_dropout=False,
                 swap_memory=False,
                 enable_vae=True):
    """
    Args:
        source_words:
        target_words:
        insert_words:
        delete_words:
        source_lengths:
        target_lengths:
        iw_lengths:
        dw_lengths:
        ctx_hidden_dim:
        ctx_hidden_layer:
        wa_hidden_dim:
        wa_hidden_layer:
        edit_dim:
        noise_scaler:
        norm_eps:
        norm_max:
        dropout_keep:

    Returns:

    """
    with tf.variable_scope(OPS_NAME):
        wa_inserted_last, wa_deleted_last = wa_accumulator(
            insert_words, delete_words, iw_lengths, dw_lengths, wa_hidden_dim)

        if use_dropout and dropout_keep < 1.:
            wa_inserted_last = tf.nn.dropout(wa_inserted_last, dropout_keep)
            wa_deleted_last = tf.nn.dropout(wa_deleted_last, dropout_keep)

        embedding_matrix = vocab.get_embeddings()
        embedding_layer = ConcatPosEmbedding(
            transformer_params.hidden_size, embedding_matrix,
            transformer_params.pos_encoding_dim)
        micro_ev_projection = tf.layers.Dense(micro_edit_ev_dim,
                                              activation=None,
                                              use_bias=True,
                                              name='micro_ev_proj')
        mev_extractor = TransformerMicroEditExtractor(embedding_layer,
                                                      micro_ev_projection,
                                                      transformer_params)

        cnx_tgt, micro_evs_st = mev_extractor(source_words, target_words,
                                              source_lengths, target_lengths)
        cnx_src, micro_evs_ts = mev_extractor(target_words, source_words,
                                              target_lengths, source_lengths)

        micro_ev_encoder = tf.make_template('micro_ev_encoder',
                                            context_encoder,
                                            hidden_dim=meve_hidden_dim,
                                            num_layers=meve_hidden_layers,
                                            swap_memory=swap_memory,
                                            use_dropout=use_dropout,
                                            dropout_keep=dropout_keep)

        aggreg_mev_st = micro_ev_encoder(micro_evs_st, source_lengths)
        aggreg_mev_ts = micro_ev_encoder(micro_evs_ts, target_lengths)

        aggreg_mev_st_last = sequence.last_relevant(aggreg_mev_st,
                                                    source_lengths)
        aggreg_mev_ts_last = sequence.last_relevant(aggreg_mev_ts,
                                                    target_lengths)

        if use_dropout and dropout_keep < 1.:
            aggreg_mev_st_last = tf.nn.dropout(aggreg_mev_st_last,
                                               dropout_keep)
            aggreg_mev_ts_last = tf.nn.dropout(aggreg_mev_ts_last,
                                               dropout_keep)

        features = tf.concat([
            aggreg_mev_st_last, aggreg_mev_ts_last, wa_inserted_last,
            wa_deleted_last
        ],
                             axis=1)

        edit_vector = tf.layers.dense(features,
                                      edit_dim,
                                      use_bias=False,
                                      name='encoder_ev')

        if enable_vae:
            edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps,
                                     norm_max)

        return edit_vector, (cnx_src, micro_evs_st), (cnx_tgt, micro_evs_ts)
def attn_encoder(source_words,
                 target_words,
                 insert_words,
                 delete_words,
                 source_lengths,
                 target_lengths,
                 iw_lengths,
                 dw_lengths,
                 ctx_hidden_dim,
                 ctx_hidden_layer,
                 wa_hidden_dim,
                 wa_hidden_layer,
                 edit_dim,
                 micro_edit_ev_dim,
                 num_heads,
                 noise_scaler,
                 norm_eps,
                 norm_max,
                 dropout_keep=1.,
                 use_dropout=False,
                 swap_memory=False,
                 enable_vae=True):
    """
    Args:
        source_words:
        target_words:
        insert_words:
        delete_words:
        source_lengths:
        target_lengths:
        iw_lengths:
        dw_lengths:
        ctx_hidden_dim:
        ctx_hidden_layer:
        wa_hidden_dim:
        wa_hidden_layer:
        edit_dim:
        noise_scaler:
        norm_eps:
        norm_max:
        dropout_keep:

    Returns:

    """
    print("RUn me111!!")
    with tf.variable_scope(OPS_NAME):
        cnx_encoder = tf.make_template('cnx_encoder',
                                       context_encoder,
                                       hidden_dim=ctx_hidden_dim,
                                       num_layers=ctx_hidden_layer,
                                       swap_memory=swap_memory,
                                       use_dropout=use_dropout,
                                       dropout_keep=dropout_keep)

        wa = tf.make_template('wa',
                              word_aggregator,
                              hidden_dim=wa_hidden_dim,
                              num_layers=wa_hidden_layer,
                              swap_memory=swap_memory,
                              use_dropout=use_dropout,
                              dropout_keep=dropout_keep)

        wa_inserted = wa(insert_words, iw_lengths)
        wa_deleted = wa(delete_words, dw_lengths)

        wa_inserted_last = sequence.last_relevant(wa_inserted, iw_lengths)
        wa_deleted_last = sequence.last_relevant(wa_deleted, dw_lengths)

        if use_dropout and dropout_keep < 1.:
            wa_inserted_last = tf.nn.dropout(wa_inserted_last, dropout_keep)
            wa_deleted_last = tf.nn.dropout(wa_deleted_last, dropout_keep)

        cnx_src = cnx_encoder(source_words, source_lengths)
        cnx_tgt = cnx_encoder(target_words, target_lengths)

        # bs x seq_len x micro_edit_vec_dim
        micro_evs_st, micro_evs_ts = create_micro_edit_vectors(
            cnx_src, cnx_tgt, source_lengths, target_lengths, ctx_hidden_dim,
            num_heads, micro_edit_ev_dim, dropout_keep, use_dropout)

        micro_evs_st = masked_fill(micro_evs_st, source_lengths, -1e9)
        micro_evs_ts = masked_fill(micro_evs_ts, target_lengths, -1e9)

        max_mev_st = tf.reduce_max(micro_evs_st,
                                   axis=1)  # bs x micro_edit_vec_dim
        max_mev_ts = tf.reduce_max(micro_evs_ts,
                                   axis=1)  # bs x micro_edit_vec_dim

        micro_ev_final_nodes = int(
            micro_edit_ev_dim / (micro_edit_ev_dim + wa_hidden_dim) * edit_dim)
        wa_final_nodes = int(wa_hidden_dim /
                             (micro_edit_ev_dim + wa_hidden_dim) * edit_dim)

        micro_evs_prenoise = tf.layers.Dense(micro_ev_final_nodes // 2,
                                             activation=None,
                                             use_bias=False)
        wa_prenoise = tf.layers.Dense(wa_final_nodes // 2,
                                      activation=None,
                                      use_bias=False)

        edit_vector = tf.concat([
            micro_evs_prenoise(max_mev_st),
            micro_evs_prenoise(max_mev_ts),
            wa_prenoise(wa_inserted_last),
            wa_prenoise(wa_deleted_last)
        ],
                                axis=1)

        if enable_vae:
            edit_vector = sample_vMF(edit_vector, noise_scaler, norm_eps,
                                     norm_max)

        return edit_vector, tf.constant([[0]]), tf.constant([[0]])