def cnn_encoder(self, input_tensor, name, reuse, all_index, filters): with tf.variable_scope(name_or_scope=name, reuse=reuse): filter = filters res = [] for index, ele in enumerate(filter): with tf.name_scope("conv-maxpool-%s" % index): conv = tf.layers.conv1d(input_tensor, 300, ele, strides=1, padding='valid', name='conv2_ops_%s_%s' % (index, all_index)) conv = tf.nn.relu(conv) conv = tf.layers.max_pooling1d(conv, 2, 1) # conv,_,_,_=self_attention_topk(conv,scope="conv2_ops_att%s_%s"%(index,all_index),top=1) conv, _ = self_attention(conv, scope="conv2_ops_att%s_%s" % (index, all_index)) conv = tf.layers.dropout(conv, self.dropout) res.append(conv) ress = tf.stack(res, 1) cnn_out = tf.reshape( ress, [-1, self.args.filter_num * len(self.args.filters)]) # cnn_out = tf.nn.dropout(cnn_out, 0.7) # sent_attention=self.intent_attention(self.sent_emb) # cnn_out=tf.concat((cnn_out,sent_attention),1) return cnn_out
def build_model_ops(self, i, sent_emb, sent_len, input_mask): with tf.device('/device:GPU:%s' % self.gpu_id): with tf.variable_scope(self.scope + "_enc_%s" % i): with tf.variable_scope(name_or_scope=self.scope + '_enc'): with tf.variable_scope(name_or_scope='enc_'): s1_trans_emb = build_transformer_emb( sent_word=self.sent_token, sent_word_emb=sent_emb, text_len=self.seq_len, args=self.args, dropout=self.dropout, name='left') sen_enc = transformer_encoder( s1_trans_emb, name='self_trans1', args=self.args, dropout=self.dropout, reuse=False, context_mask=input_mask, num_blocks=self.args.num_blocks) # sen_enc = sent_encoder(sent_word_emb=sent_emb, hidden_dim=self.args.hidden, # sequence_length=sent_len, name='sent_enc', dropout=self.dropout) sent_enc_, _ = self_attention(sen_enc, sent_len) # sent_enc_=mean_pool(sen_enc,self.sent_len) # self.estimation=tf.layers.dense(sent_enc_,self.args.class_num) estimation = tf.contrib.layers.fully_connected( inputs=sent_enc_, num_outputs=self.args.class_nums[i], activation_fn=None, weights_initializer=tf.contrib.layers. xavier_initializer(), weights_regularizer=tf.contrib.layers. l2_regularizer(scale=self.args.l2_reg), biases_initializer=tf.constant_initializer(1e-04), scope="FC") pred_probs = tf.contrib.layers.softmax(estimation) logits = tf.cast(tf.argmax(pred_probs, -1), tf.int32) self.estimation_list.append(estimation) self.pred_probs_list.append(pred_probs) self.logits_list.append(logits)
def build_model(self): self.estimation_list = [] self.pred_probs_list = [] self.logits_list = [] self.semantic_losses = [] self.args.filters = [3, 4, 5] self.args.filter_num = 300 with tf.device('/device:GPU:%s' % self.gpu_id): # for i in range(len(self.args.class_nums)): for i in range(len(self.args.class_nums)): with tf.variable_scope(name_or_scope=self.scope + '_enc_%s' % i): # sent_token=tf.layers.dropout(self.sent_token,0.1) # sent_word_re=tf.layers.dropout(self.sent_word_re,0.1) s1_emb = self.build_emb(self.sent_token, vocab_size=self.args.vocab_size, reuse=False, name='emb_%s' % i) s1_emb_re = self.build_emb(self.sent_word_re, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_char = self.build_emb( self.sent_char, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_re_char = self.build_emb( self.sent_word_re_char, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_neg = self.build_emb( self.sent_token_neg, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_char_neg = self.build_emb( self.sent_char_neg, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) if i != 0: sent_token_emb = s1_emb_re sent_char_emb = s1_emb_re_char sent_token_len = self.sent_len_re sent_char_len = self.sent_len_re_char # s1_emb=s1_emb_re # sent_len=self.sent_len_re # sent_token=self.sent_word_re # input_mask = tf.sequence_mask(self.sent_len_re, self.seq_len, dtype=tf.float32) # input_mask = tf.cast(tf.expand_dims(input_mask, axis=-1), # tf.float32) # batch_size x seq_len x 1 else: sent_token_emb = s1_emb_re sent_char_emb = s1_emb_re_char sent_token_len = self.sent_len_re sent_char_len = self.sent_len_re_char # sent_token_emb=s1_emb # sent_char_emb=s1_emb_char # sent_token_len=self.sent_len # sent_char_len=self.sent_len_char # sent_len=self.sent_len # sent_token=self.sent_token # input_mask = tf.sequence_mask(self.sent_len, self.seq_len, dtype=tf.float32) # input_mask = tf.cast(tf.expand_dims(input_mask, axis=-1), # tf.float32) # batch_size x seq_len x 1 with tf.variable_scope(name_or_scope='_%s' % i): sent_enc_token = self.transformer_encoder( sent_token_emb, sent_token_emb, name='self_trans', reuse=False, num_blocks=2) # sent_enc_token = mean_pool(sent_enc_token, sent_token_len) sent_enc_token, _ = self_attention(sent_enc_token, sent_token_len, scope='s_0') sent_enc_char = self.transformer_encoder( sent_char_emb, sent_char_emb, name='self_trans_char', reuse=False, num_blocks=2) # sent_enc_char = mean_pool(sent_enc_char, sent_char_len) sent_enc_char, _ = self_attention(sent_enc_char, sent_char_len, scope='s_1') sent_enc_tf = tf.concat( [sent_enc_token, sent_enc_char], -1) sent_token_emb = self.cnn_encoder( input_tensor=sent_token_emb, name='conv_%s' % str(i), reuse=False, all_index=i, filters=[3, 4, 5]) s1_flatten_token = tf.layers.flatten(sent_token_emb) sent_char_emb = self.cnn_encoder( input_tensor=sent_char_emb, name='conv_%s_char' % str(i), reuse=False, all_index=i, filters=[5, 6, 7]) s1_flatten_char = tf.layers.flatten(sent_char_emb) sent_enc_cnn = tf.concat( [s1_flatten_token, s1_flatten_char], -1) sent_enc_cnn = tf.layers.dense(sent_enc_cnn, 600) sent_enc = tf.stack([sent_enc_tf, sent_enc_cnn], 1) sent_enc_, _ = self_attention(sent_enc) ##### distance loss s1_emb_enc, s2_emb_enc = sent_enc_tf, sent_enc_cnn query_norm = tf.sqrt( tf.reduce_sum(tf.square(s1_emb_enc), 1, True)) doc_norm = tf.sqrt( tf.reduce_sum(tf.square(s2_emb_enc), 1, True)) prod = tf.reduce_sum( tf.multiply(s1_emb_enc, s2_emb_enc), 1, True) norm_prod = tf.multiply(query_norm, doc_norm) + 0.01 cos_sim = tf.truediv(prod, norm_prod) neg_cos_sim = tf.abs(1 - cos_sim) estimation_semantic = tf.concat([neg_cos_sim, cos_sim], 1) semantic_traget = tf.ones_like(self.target1) semantic_loss = tf.losses.sparse_softmax_cross_entropy( labels=semantic_traget, logits=estimation_semantic) self.semantic_losses.append(0.0) # sent_enc_, _ = self_attention(sen_enc, sent_len) # # sent_enc_=tf.reshape(tf.cast([sent_enc_,s1_flatten],1),[-1,1200]) # self.estimation=tf.layers.dense(sent_enc_,self.args.class_num) estimation = tf.contrib.layers.fully_connected( inputs=sent_enc_, num_outputs=self.args.class_nums[i], activation_fn=None, weights_initializer=tf.contrib.layers. xavier_initializer(), weights_regularizer=tf.contrib.layers. l2_regularizer(scale=self.args.l2_reg), biases_initializer=tf.constant_initializer(1e-04), scope="FC") pred_probs = tf.contrib.layers.softmax(estimation) logits = tf.cast(tf.argmax(pred_probs, -1), tf.int32) self.estimation_list.append(estimation) self.pred_probs_list.append(pred_probs) self.logits_list.append(logits)
def build_model(self): self.estimation_list = [] self.pred_probs_list = [] self.logits_list = [] self.semantic_losses = [] self.label_losses = [] self.args.filters = [3, 4, 5] self.args.filter_num = 300 # with tf.device('/device:GPU:%s' % self.gpu_id): # for i in range(len(self.args.class_nums)): for i in range(len(self.args.class_nums)): with tf.variable_scope(name_or_scope=self.scope + '_enc_%s' % i): # sent_token=tf.layers.dropout(self.sent_token,0.1) # sent_word_re=tf.layers.dropout(self.sent_word_re,0.1) s1_emb = self.build_emb(self.sent_token, vocab_size=self.args.vocab_size, reuse=False, name='emb_%s' % i) s1_emb_re = self.build_emb(self.sent_word_re, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_char = self.build_emb(self.sent_char, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_re_char = self.build_emb( self.sent_word_re_char, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_neg = self.build_emb(self.sent_token_neg, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) s1_emb_char_neg = self.build_emb( self.sent_char_neg, vocab_size=self.args.vocab_size, reuse=True, name='emb_%s' % i) sent_token_emb = s1_emb sent_char_emb = s1_emb_char sent_token_len = self.sent_len sent_char_len = self.sent_len_char # s1_emb=s1_emb_re # sent_len=self.sent_len_re # sent_token=self.sent_word_re input_mask_token = tf.sequence_mask(sent_token_len, self.seq_len, dtype=tf.float32) input_mask_token = tf.cast( tf.expand_dims(input_mask_token, axis=-1), tf.float32) input_mask_char = tf.sequence_mask(sent_char_len, self.seq_len, dtype=tf.float32) input_mask_char = tf.cast( tf.expand_dims(input_mask_char, axis=-1), tf.float32) label_emb = tf.Variable(tf.random_uniform( shape=(self.args.class_nums[i], self.args.emb_size), maxval=1.0, minval=-1.0, dtype=tf.float32), trainable=True) with tf.variable_scope(name_or_scope='_%s' % i): sent_enc_token = self.transformer_encoder( sent_token_emb, sent_token_emb, name='self_trans', reuse=False, num_blocks=2) # sent_enc_token=tf.multiply(sent_enc_token,tf.expand_dims(self.key_emb,2)) # sent_enc_token = mean_pool(sent_enc_token, sent_token_len) sent_enc_token_att, _ = self_attention(sent_enc_token, sent_token_len, scope='s_0') print('###', sent_enc_token) sent_enc_token_label_att = self.label_sent_attention( sent_enc_token, label_emb, input_mask_token) sent_enc_char = self.transformer_encoder( sent_char_emb, sent_char_emb, name='self_trans_char', reuse=False, num_blocks=2) # sent_enc_char = mean_pool(sent_enc_char, sent_char_len) sent_enc_char_att, _ = self_attention(sent_enc_char, sent_char_len, scope='s_1') sent_enc_char_label_att = self.label_sent_attention( sent_enc_char, label_emb, input_mask_char) # sent_enc_ = tf.concat([sent_enc_token_att, sent_enc_char_att], -1) sent_enc_ = tf.concat( [sent_enc_token_label_att, sent_enc_char_label_att], -1) # sent_enc_ = sent_enc_token logit_label = tf.layers.dense(label_emb, self.args.class_nums[i], name='out_layers_%s' % i, reuse=False) class_y = tf.constant( name='class_y_%s' % i, shape=[ self.args.class_nums[i], self.args.class_nums[i] ], dtype=tf.float32, value=np.identity(self.args.class_nums[i]), ) label_loss = tf.losses.softmax_cross_entropy( class_y, logits=logit_label) self.label_losses.append(label_loss) s1_emb_neg = self.transformer_encoder(s1_emb_neg, s1_emb_neg, name='self_trans', reuse=True, num_blocks=2) # s1_emb_neg = mean_pool(s1_emb_neg, self.sent_len_neg) s1_emb_neg, _ = self_attention(s1_emb_neg, self.sent_len_neg, scope='s_0', reuse=True) s1_emb_char_neg = self.transformer_encoder( s1_emb_char_neg, s1_emb_char_neg, name='self_trans_char', reuse=True, num_blocks=2) # s1_emb_char_neg = mean_pool(s1_emb_char_neg, self.sent_char_len_neg,) s1_emb_char_neg, _ = self_attention(s1_emb_char_neg, self.sent_char_len_neg, scope='s_1', reuse=True) sent_enc_neg = tf.concat([s1_emb_neg, s1_emb_char_neg], -1) s1_emb_enc, s2_emb_enc = sent_enc_, sent_enc_neg query_norm = tf.sqrt( tf.reduce_sum(tf.square(s1_emb_enc), 1, True)) doc_norm = tf.sqrt( tf.reduce_sum(tf.square(s2_emb_enc), 1, True)) prod = tf.reduce_sum(tf.multiply(s1_emb_enc, s2_emb_enc), 1, True) norm_prod = tf.multiply(query_norm, doc_norm) + 0.01 cos_sim = tf.truediv(prod, norm_prod) neg_cos_sim = tf.abs(1 - cos_sim) estimation_semantic = tf.concat([neg_cos_sim, cos_sim], 1) semantic_traget = tf.zeros_like(self.target1) semantic_loss = tf.losses.sparse_softmax_cross_entropy( labels=semantic_traget, logits=estimation_semantic) self.semantic_losses.append(semantic_loss) # self.semantic_losses.extend(tf.reduce_mean(tf.zeros_like(self.target1))) # sent_enc_, _ = self_attention(sen_enc, sent_len) # # sent_enc_=tf.reshape(tf.cast([sent_enc_,s1_flatten],1),[-1,1200]) # self.estimation=tf.layers.dense(sent_enc_,self.args.class_num) estimation = tf.contrib.layers.fully_connected( inputs=sent_enc_, num_outputs=self.args.class_nums[i], activation_fn=None, weights_initializer=tf.contrib.layers. xavier_initializer(), weights_regularizer=tf.contrib.layers.l2_regularizer( scale=self.args.l2_reg), biases_initializer=tf.constant_initializer(1e-04), scope="FC") with tf.variable_scope(name_or_scope='out_softmax'): pred_probs = tf.contrib.layers.softmax(estimation) logits = tf.cast(tf.argmax(pred_probs, -1), tf.int32) self.estimation_list.append(estimation) self.pred_probs_list.append(pred_probs) self.logits_list.append(logits)
def build_model(self): with tf.device('/device:GPU:%s' % self.gpu_id): s1_emb = self.build_emb(self.sent_token,emb_size=self.args.vocab_size,emb_dim=300,name='word_emb',reuse=False) s1_emb_seg = self.build_emb(self.sent_seg,emb_size=self.args.seg_num,emb_dim=300,name='word_emb_seg',reuse=False) s1_emb_poss = self.build_emb(self.sent_poss,emb_size=self.args.poss_num,emb_dim=300,name='word_emb_poss',reuse=False) input_mask = tf.sequence_mask(self.sent_len,self.seq_len) input_mask = tf.cast(tf.expand_dims(input_mask, axis=-1),tf.float32) # batch_size x seq_len x 1 # s1_emb *= input_mask # s1_emb_seg *= input_mask # s1_emb_poss *= input_mask self.args.proximity_bias=False self.args.pos='emb' self.args.layer_prepostprocess_dropout=0.1 self.args.num_encoder_layers=1 self.args.hidden_size=self.args.emb_size self.args.num_heads=self.args.num_heads self.args.attention_dropout=0.1 self.args.self_attention_type="dot_product" self.args.max_relative_position=5 self.args.max_length=self.seq_len self.args.attention_variables_3d=False self.args.layer_postprocess_sequence="da" #dropout add_previous Normal self.args.layer_preprocess_sequence="n" self.args.activation_dtype="bfloat32" self.args.use_target_space_embedding=False self.args.attention_dropout_broadcast_dims="" self.args.use_pad_remover=False self.args.norm_type='layer' self.args.norm_epsilon=1e-6 self.args.layer_prepostprocess_dropout_broadcast_dims='' self.args.attention_key_channels=0 self.args.attention_value_channels=0 self.args.relu_dropout=0.2 self.args.conv_first_kernel=3 self.args.ffn_layer="dense_relu_dense" self.args.relu_dropout_broadcast_dims='' self.args.filter_size=512 self.args.weight_decay=1e-5, # emb=tf.concat([s1_emb,s1_emb_seg,s1_emb_poss],2) emb=s1_emb # emb+=s1_emb_seg # emb+=s1_emb_poss with tf.variable_scope(name_or_scope=self.scope+'_enc'): encoder_output = transformer_encoder_ht(emb,target_space=None,hparams=self.args,features=None,losses=None) # input_mask = tf.squeeze(input_mask, axis=-1) # v_attn = multi_dimensional_attention( # encoder_output, input_mask, 'multi_dim_attn_for_%s' % "atten", # 1 - self.dropout, True, self.args.weight_decay, "relu") # # v_sum = tf.reduce_sum(encoder_output, 1) # v_ave = tf.div(v_sum, input_mask) # v_max = tf.reduce_max(encoder_output, 1) # # out = tf.concat([v_ave, v_max, v_attn], axis=-1) print("#####encoder_output",encoder_output) sen_enc=encoder_output # print("context_mask",input_mask) # dropout_rate = tf.cond(is_training, # lambda: config.dropout_rate, # lambda: 0.0) s1_trans_emb = build_transformer_emb(sent_word=self.sent_token, sent_word_emb=emb, text_len=self.seq_len, args=self.args, dropout=0.0, name='left') sen_enc = transformer_encoder(s1_trans_emb, name='self_trans1', args=self.args, dropout=0.0, reuse=False,context_mask=input_mask, num_blocks=self.args.num_blocks) ner_sent_emb=tf.layers.dense(sen_enc,self.args.ner_num) self.ner_soft=tf.nn.softmax(ner_sent_emb,-1) # ner_pre_label=tf.argmax(self.ner_soft,-1) # self.ner_loss=tf.losses.sparse_softmax_cross_entropy(self.sent_ner,self.ner_soft) # self.ner_pre_label=ner_pre_label # ner_struct=ner_pre_label log_likelihood, trans_params = tf.contrib.crf.crf_log_likelihood( ner_sent_emb, self.sent_ner, self.sent_len) self.trans_params = trans_params # need to evaluate it for decoding viterbi_sequence, viterbi_score = tf.contrib.crf.crf_decode( ner_sent_emb, trans_params, self.sent_len) ner_pre_label=viterbi_sequence correct = tf.equal( tf.cast(ner_pre_label, tf.int32), tf.cast(self.sent_ner, tf.int32) ) self.ner_pre_label=viterbi_sequence ner_struct=viterbi_sequence self.ner_accuracy = tf.reduce_mean(tf.cast(correct, tf.float32)) self.ner_loss = tf.reduce_mean(-log_likelihood) ner_struct_mask=tf.cast(tf.expand_dims(ner_struct,2),tf.float32) print('ner_struct',ner_struct) # sen_attention=self_attention(sen_enc,sequence_len) # # if self.args.GetEmbtype == 'mean_pool': # sent_enc_=mean_pool(sen_enc,self.sent_len) # else: # sent_enc_=last_relevant_output(sen_enc,self.sent_len) # sent_enc_=tf.concat([sent_enc_,sen_attention],1) # sent_enc_=last_relevant_output(sen_enc,self.sent_len) sent_enc_,_=self_attention(sen_enc,self.sent_len) # sent_enc_=tf.multiply(sen_enc,ner_struct_mask) sent_enc_=tf.multiply(sen_enc,tf.cast(tf.expand_dims(self.sent_ner,2),tf.float32)) sent_enc_ = tf.reduce_mean(sent_enc_, 1) # sent_enc_=mean_pool(sen_enc,self.sent_len) # self.estimation=tf.layers.dense(sent_enc_,self.args.class_num) self.estimation = tf.contrib.layers.fully_connected( inputs=sent_enc_, num_outputs=self.args.class_num, activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=tf.contrib.layers.l2_regularizer(scale=self.args.l2_reg), biases_initializer=tf.constant_initializer(1e-04), scope="FC" ) self.l2_loss= 0.0 self.pred_probs = tf.contrib.layers.softmax(self.estimation) self.logits = tf.cast(tf.argmax(self.pred_probs, -1), tf.int32)
def build_model(self): with tf.variable_scope(name_or_scope=self.scope + '_enc'): s1_emb = self.build_emb(self.sent_token, vocab_size=self.args.vocab_size,reuse=False,name='emb_%s'%i) s1_emb_re = self.build_emb(self.sent_word_re, vocab_size=self.args.vocab_size,reuse=True,name='emb_%s'%i) s1_emb_char = self.build_emb(self.sent_char, vocab_size=self.args.vocab_size,reuse=True,name='emb_%s'%i) s1_emb_re_char = self.build_emb(self.sent_word_re_char, vocab_size=self.args.vocab_size,reuse=True,name='emb_%s'%i) s1_emb_neg = self.build_emb(self.sent_token_neg, vocab_size=self.args.vocab_size,reuse=True,name='emb_%s'%i) s1_emb_char_neg = self.build_emb(self.sent_char_neg, vocab_size=self.args.vocab_size,reuse=True,name='emb_%s'%i) if i != 0: sent_token_emb=s1_emb sent_char_emb=s1_emb_char sent_token_len=self.sent_len sent_char_len=self.sent_len_char # s1_emb=s1_emb_re # sent_len=self.sent_len_re # sent_token=self.sent_word_re # input_mask = tf.sequence_mask(self.sent_len_re, self.seq_len, dtype=tf.float32) # input_mask = tf.cast(tf.expand_dims(input_mask, axis=-1), # tf.float32) # batch_size x seq_len x 1 else: sent_token_emb=s1_emb sent_char_emb=s1_emb_char sent_token_len=self.sent_len sent_char_len=self.sent_len_char # sent_token_emb=s1_emb # sent_char_emb=s1_emb_char # sent_token_len=self.sent_len # sent_char_len=self.sent_len_char # sent_len=self.sent_len # sent_token=self.sent_token # input_mask = tf.sequence_mask(self.sent_len, self.seq_len, dtype=tf.float32) # input_mask = tf.cast(tf.expand_dims(input_mask, axis=-1), # tf.float32) # batch_size x seq_len x 1 with tf.variable_scope(name_or_scope='_%s'%i): sent_enc_token = self.transformer_encoder(sent_token_emb, sent_token_emb, name='self_trans', reuse=False, num_blocks=2) # sent_enc_token=tf.multiply(sent_enc_token,tf.expand_dims(self.key_emb,2)) # sent_enc_token = mean_pool(sent_enc_token, sent_token_len) sent_enc_token,_ = self_attention(sent_enc_token,sent_token_len,scope='s_0') # sent_enc_token,_ = self_attention(sent_enc_token,sent_token_len,scope='s_0') sent_enc_char = self.transformer_encoder(sent_char_emb, sent_char_emb, name='self_trans_char', reuse=False, num_blocks=2) sent_enc_char = mean_pool(sent_enc_char, sent_char_len) sent_enc_char,_ = self_attention(sent_enc_char,sent_char_len,scope='s_1') sent_enc_ = tf.concat([sent_enc_token, sent_enc_char], -1) # sent_enc_ = sent_enc_token s1_emb_neg = self.transformer_encoder(s1_emb_neg, s1_emb_neg, name='self_trans', reuse=True, num_blocks=2) # s1_emb_neg = mean_pool(s1_emb_neg, self.sent_len_neg) s1_emb_neg,_ = self_attention(s1_emb_neg, self.sent_len_neg,scope='s_0',reuse=True) s1_emb_char_neg = self.transformer_encoder(s1_emb_char_neg, s1_emb_char_neg, name='self_trans_char', reuse=True, num_blocks=2) # s1_emb_char_neg = mean_pool(s1_emb_char_neg, self.sent_char_len_neg,) s1_emb_char_neg,_ = self_attention(s1_emb_char_neg, self.sent_char_len_neg,scope='s_1',reuse=True) sent_enc_neg = tf.concat([s1_emb_neg, s1_emb_char_neg], -1) s1_emb_enc,s2_emb_enc=sent_enc_,sent_enc_neg query_norm = tf.sqrt(tf.reduce_sum(tf.square(s1_emb_enc), 1, True)) doc_norm = tf.sqrt(tf.reduce_sum(tf.square(s2_emb_enc), 1, True)) prod = tf.reduce_sum(tf.multiply(s1_emb_enc, s2_emb_enc), 1, True) norm_prod = tf.multiply(query_norm, doc_norm) + 0.01 cos_sim = tf.truediv(prod, norm_prod) neg_cos_sim = tf.abs(1 - cos_sim) with tf.variable_scope(name_or_scope='semantic_out'): estimation_semantic = tf.concat([neg_cos_sim, cos_sim], 1) semantic_traget=tf.zeros_like(self.target1) semantic_loss = tf.losses.sparse_softmax_cross_entropy(labels=semantic_traget, logits=estimation_semantic) self.semantic_losses.append(semantic_loss) # self.semantic_losses.extend(tf.reduce_mean(tf.zeros_like(self.target1))) # sent_enc_, _ = self_attention(sen_enc, sent_len) # # sent_enc_=tf.reshape(tf.cast([sent_enc_,s1_flatten],1),[-1,1200]) # self.estimation=tf.layers.dense(sent_enc_,self.args.class_num) estimation = tf.contrib.layers.fully_connected( inputs=sent_enc_, num_outputs=self.args.class_nums[i], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=tf.contrib.layers.l2_regularizer(scale=self.args.l2_reg), biases_initializer=tf.constant_initializer(1e-04), scope="FC" ) pred_probs = tf.contrib.layers.softmax(estimation) logits = tf.cast(tf.argmax(pred_probs, -1), tf.int32) self.estimation_list.append(estimation) self.pred_probs_list.append(pred_probs) self.logits_list.append(logits)