# conv_hyp = char_conv(char_embedding_hyp) # embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1, conv_pre), -1) # embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2, conv_hyp), -1) embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1), -1) embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2), -1) hout_pre = highway_network(embed_pre, 2, [tf.nn.sigmoid] * 2, "premise") hout_hyp = highway_network(embed_hyp, 2, [tf.nn.sigmoid] * 2, "hypothesis") #peter: dim reduction hout_pre = normalize(tf.layers.dense(hout_pre, hidden_dim, activation=tf.nn.sigmoid)) hout_hyp = normalize(tf.layers.dense(hout_hyp, hidden_dim, activation=tf.nn.sigmoid)) hout_pre = mask(hout_pre, sent1_mask) hout_hyp = mask(hout_hyp, sent2_mask) pre_atten = multihead_attention(hout_pre, hout_pre, hout_pre, scope="pre_atten" ) hyp_atten = multihead_attention(hout_hyp, hout_hyp, hout_hyp, scope="hyp_atten" ) ##concat the output of hw &attention
embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1, conv_pre), -1) embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2, conv_hyp), -1) hout_pre = highway_network(embed_pre, 2, [tf.nn.sigmoid] * 2, "premise") hout_hyp = highway_network(embed_hyp, 2, [tf.nn.sigmoid] * 2, "hypothesis") #peter: dim reduction hout_pre = normalize( tf.layers.dense(hout_pre, hidden_dim, activation=tf.nn.sigmoid)) hout_hyp = normalize( tf.layers.dense(hout_hyp, hidden_dim, activation=tf.nn.sigmoid)) hout_pre = mask(hout_pre, sent1_mask) hout_hyp = mask(hout_hyp, sent2_mask) pre_atten = multihead_attention(hout_pre, hout_pre, hout_pre, h=num_heads, scope="pre_atten") hyp_atten = multihead_attention(hout_hyp, hout_hyp, hout_hyp, h=num_heads, scope="hyp_atten") p2h_atten = multihead_attention(hout_pre,
# embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1, conv_pre), -1) # embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2, conv_hyp), -1) embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1), -1) embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2), -1) hout_pre = highway_network(embed_pre, 2, [tf.nn.sigmoid] * 2, "premise") hout_hyp = highway_network(embed_hyp, 2, [tf.nn.sigmoid] * 2, "hypothesis") #peter: dim reduction hout_pre = normalize( tf.layers.dense(hout_pre, hidden_dim, activation=tf.nn.sigmoid)) hout_hyp = normalize( tf.layers.dense(hout_hyp, hidden_dim, activation=tf.nn.sigmoid)) hout_pre = mask(hout_pre, sent1_mask) hout_hyp = mask(hout_hyp, sent2_mask) pre_atten = multihead_attention(hout_pre, hout_pre, hout_pre, scope="pre_atten") hyp_atten = multihead_attention(hout_hyp, hout_hyp, hout_hyp, scope="hyp_atten") p2h_atten = multihead_attention(hout_pre, hout_hyp, hout_hyp,