Example #1
0
def create_micro_edit_vectors(cnx_src, cnx_tgt, src_lengths, tgt_lengths,
                              d_model, num_heads, micro_ev_dim,
                              dropout_keep=1., use_dropout=False):
    assert d_model % num_heads == 0
    d_small = int(d_model // num_heads)

    st_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='src_tgt_attn')
    ts_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='tgt_src_attn')

    attn_src_tgt = st_mha([cnx_src, cnx_tgt, cnx_tgt, tgt_lengths])  # bs x src_seq_len x word_dim
    attn_tgt_src = ts_mha([cnx_tgt, cnx_src, cnx_src, src_lengths])  # bs x tgt_seq_len x word_dim

    if use_dropout and dropout_keep < 1.:
        attn_src_tgt = tf.nn.dropout(attn_src_tgt, dropout_keep)
        attn_tgt_src = tf.nn.dropout(attn_tgt_src, dropout_keep)

    micro_edit_feed_st = tf.concat([cnx_src, attn_src_tgt], axis=2)  # bs x src_seq_len x 2*word_dim
    micro_edit_feed_ts = tf.concat([cnx_tgt, attn_tgt_src], axis=2)  # bs x src_seq_len x 2*word_dim

    micro_ev_st_creator = tf.layers.Dense(micro_ev_dim, name='micro_ev_st_creator', use_bias=False)
    micro_ev_ts_creator = tf.layers.Dense(micro_ev_dim, name='micro_ev_ts_creator', use_bias=False)
    micro_evs_st = micro_ev_st_creator(micro_edit_feed_st)  # bs x src_seq_len x micro_edit_vec_dim
    micro_evs_ts = micro_ev_ts_creator(micro_edit_feed_ts)  # bs x src_seq_len x micro_edit_vec_dim

    is_training = tf.get_collection('is_training')[0]

    micro_evs_st = tf.layers.batch_normalization(micro_evs_st, training=is_training, name="normalize_st")
    micro_evs_ts = tf.layers.batch_normalization(micro_evs_ts, training=is_training, name="normalize_ts")

    if use_dropout and dropout_keep < 1.:
        micro_evs_st = tf.nn.dropout(micro_evs_st, dropout_keep)
        micro_evs_ts = tf.nn.dropout(micro_evs_ts, dropout_keep)

    return micro_evs_st, micro_evs_ts
Example #2
0
def create_micro_edit_vectors(cnx_src,
                              cnx_tgt,
                              src_lengths,
                              tgt_lengths,
                              d_model,
                              num_heads,
                              micro_ev_dim,
                              dropout_keep=1.,
                              use_dropout=False):
    assert d_model % num_heads == 0
    d_small = int(d_model // num_heads)

    st_mha = MultiHeadAttention(num_heads,
                                d_model,
                                d_small,
                                use_dropout,
                                dropout_keep,
                                name='src_tgt_attn')
    ts_mha = MultiHeadAttention(num_heads,
                                d_model,
                                d_small,
                                use_dropout,
                                dropout_keep,
                                name='tgt_src_attn')

    attn_src_tgt = st_mha([cnx_src, cnx_tgt, cnx_tgt,
                           tgt_lengths])  # bs x src_seq_len x word_dim
    attn_tgt_src = ts_mha([cnx_tgt, cnx_src, cnx_src,
                           src_lengths])  # bs x tgt_seq_len x word_dim

    if use_dropout and dropout_keep < 1.:
        attn_src_tgt = tf.nn.dropout(attn_src_tgt, dropout_keep)
        attn_tgt_src = tf.nn.dropout(attn_tgt_src, dropout_keep)

    micro_edit_feed_st = tf.concat([cnx_src, attn_src_tgt],
                                   axis=2)  # bs x src_seq_len x 2*word_dim
    micro_edit_feed_ts = tf.concat([cnx_tgt, attn_tgt_src],
                                   axis=2)  # bs x src_seq_len x 2*word_dim

    micro_ev_creator = tf.layers.Dense(micro_ev_dim,
                                       activation='tanh',
                                       name='micro_ev_creator')
    micro_evs_st = micro_ev_creator(
        micro_edit_feed_st)  # bs x src_seq_len x micro_edit_vec_dim
    micro_evs_ts = micro_ev_creator(
        micro_edit_feed_ts)  # bs x src_seq_len x micro_edit_vec_dim

    if use_dropout and dropout_keep < 1.:
        micro_evs_st = tf.nn.dropout(micro_evs_st, dropout_keep)
        micro_evs_ts = tf.nn.dropout(micro_evs_ts, dropout_keep)

    return micro_evs_st, micro_evs_ts
Example #3
0
def create_micro_edit_vectors(cnx_src, cnx_tgt, src_lengths, tgt_lengths,
                              d_model, num_heads, micro_ev_dim,
                              dropout_keep=1., use_dropout=False):
    batch_size = tf.shape(cnx_src)[0]
    memory_size = cnx_src.shape[-1]
    remove_embedding = tf.get_variable('remove_tok_embedding',
                                       shape=(memory_size,),
                                       dtype=tf.float32,
                                       initializer=tf.initializers.glorot_uniform(),
                                       trainable=True)

    # 1 x 1 x memory_size
    remove_embedding = tf.expand_dims(tf.expand_dims(remove_embedding, 0), 0)

    # bs x 1 x memory_size
    remove_embedding = tf.tile(remove_embedding, [batch_size, 1, 1])

    # bs x 1 + time_step x memory_size
    extend_cnx_src = tf.concat([remove_embedding, cnx_src], axis=1)
    extend_src_lengths = src_lengths + 1

    # bs x 1 + time_step x memory_size
    extend_cnx_tgt = tf.concat([remove_embedding, cnx_tgt], axis=1)
    extend_tgt_lengths = tgt_lengths + 1

    assert d_model % num_heads == 0
    d_small = int(d_model // num_heads)

    mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='src_tgt_attn')

    attn_src_tgt = mha([cnx_src, extend_cnx_tgt, extend_cnx_tgt, extend_tgt_lengths])  # bs x src_seq_len x word_dim
    attn_tgt_src = mha([cnx_tgt, extend_cnx_src, extend_cnx_src, extend_src_lengths])  # bs x tgt_seq_len x word_dim

    if use_dropout and dropout_keep < 1.:
        attn_src_tgt = tf.nn.dropout(attn_src_tgt, dropout_keep)
        attn_tgt_src = tf.nn.dropout(attn_tgt_src, dropout_keep)

    micro_edit_feed_st = tf.concat([cnx_src, attn_src_tgt], axis=2)  # bs x src_seq_len x 2*word_dim
    micro_edit_feed_ts = tf.concat([cnx_tgt, attn_tgt_src], axis=2)  # bs x src_seq_len x 2*word_dim

    micro_ev_creator = tf.layers.Dense(micro_ev_dim, name='micro_ev_creator')
    micro_evs_st = micro_ev_creator(micro_edit_feed_st)  # bs x src_seq_len x micro_edit_vec_dim
    micro_evs_ts = micro_ev_creator(micro_edit_feed_ts)  # bs x src_seq_len x micro_edit_vec_dim

    if use_dropout and dropout_keep < 1.:
        micro_evs_st = tf.nn.dropout(micro_evs_st, dropout_keep)
        micro_evs_ts = tf.nn.dropout(micro_evs_ts, dropout_keep)

    return micro_evs_st, micro_evs_ts
def create_micro_edit_vectors(cnx_src, cnx_tgt, src_lengths, tgt_lengths,
                              d_model, num_heads, micro_ev_dim, noise_scaler, norm_eps, norm_max,
                              dropout_keep=1., use_dropout=False):
    assert d_model % num_heads == 0
    d_small = int(d_model // num_heads)

    st_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='src_tgt_attn')
    ts_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='tgt_src_attn')

    attn_src_tgt = st_mha([cnx_src, cnx_tgt, cnx_tgt, tgt_lengths])  # bs x src_seq_len x word_dim
    attn_tgt_src = ts_mha([cnx_tgt, cnx_src, cnx_src, src_lengths])  # bs x tgt_seq_len x word_dim

    if use_dropout and dropout_keep < 1.:
        attn_src_tgt = tf.nn.dropout(attn_src_tgt, dropout_keep)
        attn_tgt_src = tf.nn.dropout(attn_tgt_src, dropout_keep)

    micro_edit_feed_st = tf.concat([cnx_src, attn_src_tgt], axis=2)  # bs x src_seq_len x 2*word_dim
    micro_edit_feed_ts = tf.concat([cnx_tgt, attn_tgt_src], axis=2)  # bs x src_seq_len x 2*word_dim

    micro_ev_st_creator = tf.layers.Dense(micro_ev_dim, name='micro_ev_st_creator')
    micro_evs_st = micro_ev_st_creator(micro_edit_feed_st)  # bs x src_seq_len x micro_edit_vec_dim
    micro_evs_ts = micro_ev_st_creator(micro_edit_feed_ts)  # bs x src_seq_len x micro_edit_vec_dim

    org_shape = tf.shape(micro_evs_st)
    micro_evs_st = sample_vMF(tf.reshape(micro_evs_st, (-1, micro_ev_dim)), noise_scaler, norm_eps, norm_max)
    micro_evs_st = tf.reshape(micro_evs_st, org_shape)

    org_shape = tf.shape(micro_evs_ts)
    micro_evs_ts = sample_vMF(tf.reshape(micro_evs_ts, (-1, micro_ev_dim)), noise_scaler, norm_eps, norm_max)
    micro_evs_ts = tf.reshape(micro_evs_ts, org_shape)

    if use_dropout and dropout_keep < 1.:
        micro_evs_st = tf.nn.dropout(micro_evs_st, dropout_keep)
        micro_evs_ts = tf.nn.dropout(micro_evs_ts, dropout_keep)

    return micro_evs_st, micro_evs_ts
def create_micro_edit_vectors(cnx_src, cnx_tgt, src_lengths, tgt_lengths,
                              d_model, num_heads, micro_ev_dim,
                              dropout_keep=1., use_dropout=False):
    assert d_model % num_heads == 0
    d_small = int(d_model // num_heads)

    st_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='src_tgt_attn')
    ts_mha = MultiHeadAttention(num_heads, d_model, d_small, use_dropout, dropout_keep, name='tgt_src_attn')

    attn_src_tgt = st_mha([cnx_src, cnx_tgt, cnx_tgt, tgt_lengths])  # bs x src_seq_len x word_dim
    attn_tgt_src = ts_mha([cnx_tgt, cnx_src, cnx_src, src_lengths])  # bs x tgt_seq_len x word_dim

    if use_dropout and dropout_keep < 1.:
        attn_src_tgt = tf.nn.dropout(attn_src_tgt, dropout_keep)
        attn_tgt_src = tf.nn.dropout(attn_tgt_src, dropout_keep)

    micro_evs_st = attn_src_tgt - cnx_src  # bs x src_seq_len x word_dim
    micro_evs_ts = attn_tgt_src - cnx_tgt  # bs x src_seq_len x word_dim

    if use_dropout and dropout_keep < 1.:
        micro_evs_st = tf.nn.dropout(micro_evs_st, dropout_keep)
        micro_evs_ts = tf.nn.dropout(micro_evs_ts, dropout_keep)

    return micro_evs_st, micro_evs_ts