hout_pre = highway_network(embed_pre, 2, [tf.nn.sigmoid] * 2, "premise") hout_hyp = highway_network(embed_hyp, 2, [tf.nn.sigmoid] * 2, "hypothesis") #peter: dim reduction hout_pre = normalize( tf.layers.dense(hout_pre, hidden_dim, activation=tf.nn.sigmoid)) hout_hyp = normalize( tf.layers.dense(hout_hyp, hidden_dim, activation=tf.nn.sigmoid)) hout_pre = mask(hout_pre, sent1_mask) hout_hyp = mask(hout_hyp, sent2_mask) pre_atten = multihead_attention(hout_pre, hout_pre, hout_pre, h=num_heads, scope="pre_atten") hyp_atten = multihead_attention(hout_hyp, hout_hyp, hout_hyp, h=num_heads, scope="hyp_atten") p2h_atten = multihead_attention(hout_pre, hout_hyp, hout_hyp, h=num_heads, scope="p2h_atten")
embed_pre = tf.concat((embedding_pre, antonym1, exact1to2, synonym1), -1) embed_hyp = tf.concat((embedding_hyp, antonym2, exact2to1, synonym2), -1) hout_pre = highway_network(embed_pre, 2, [tf.nn.sigmoid] * 2, "premise") hout_hyp = highway_network(embed_hyp, 2, [tf.nn.sigmoid] * 2, "hypothesis") #peter: dim reduction hout_pre = normalize(tf.layers.dense(hout_pre, hidden_dim, activation=tf.nn.sigmoid)) hout_hyp = normalize(tf.layers.dense(hout_hyp, hidden_dim, activation=tf.nn.sigmoid)) hout_pre = mask(hout_pre, sent1_mask) hout_hyp = mask(hout_hyp, sent2_mask) pre_atten = multihead_attention(hout_pre, hout_pre, hout_pre, scope="pre_atten" ) hyp_atten = multihead_attention(hout_hyp, hout_hyp, hout_hyp, scope="hyp_atten" ) ##concat the output of hw &attention #[B, L, 300+300] concatP =tf.concat(values = [hout_pre, pre_atten],axis = 2, name='concatP') concatH =tf.concat(values = [hout_hyp, hyp_atten],axis = 2, name='concatH')