def att_match(mid, pat, mid_mask, pat_mask, hidden, keep_prob, is_train): mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train) pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train) mid_a = attention(mid_d, hidden, mask=mid_mask) pat_a = attention(pat_d, hidden, mask=pat_mask) mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1) pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1) pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1) sur_sim = cosine(mid_v, pat_v_d) pat_sim = cosine(pat_v, pat_v_d) return sur_sim, pat_sim
def lstm_match(mid, pat, mid_mask, pat_mask, mid_len, pat_len, hidden, keep_prob, is_train): rnn = Cudnn_RNN(num_layers=1, num_units=hidden // 2) mid, _ = rnn(mid, seq_len=mid_len, concat_layers=False) pat, _ = rnn(pat, seq_len=pat_len, concat_layers=False) mid_d = dropout(mid, keep_prob=keep_prob, is_train=is_train) pat_d = dropout(pat, keep_prob=keep_prob, is_train=is_train) mid_a = attention(mid_d, hidden, mask=mid_mask) pat_a = attention(pat_d, hidden, mask=pat_mask) mid_v = tf.reduce_sum(tf.expand_dims(mid_a, axis=2) * mid, axis=1) pat_v = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat, axis=1) pat_v_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1) sur_sim = cosine(mid_v, pat_v_d) pat_sim = cosine(pat_v, pat_v_d) return sur_sim, pat_sim
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru gi = [] att_vP = [] for i in range(config.max_para): with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape(tf.nn.embedding_lookup(\ self.char_mat, self.pr_ch), [N * PL, CL, dc]) # self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) # att is the v_P att_vP.append(att) """ with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) """ with tf.variable_scope("pointer"): # r_Q: init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, att, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul( tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits( logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2) # print losses #condition = tf.greater(self.loss, 11) #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1) #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1) for i in range(config.max_para): # Passage ranking with tf.variable_scope("passage-ranking-attention"): vj_P = dropout(att, keep_prob=keep_prob, is_train=is_train) r_Q = dropout(init, keep_prob=keep_prob, is_train=is_train) r_P = attention(r_Q, vj_P, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) #rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=pr_att.get_shape( #).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) #att_rp = rnn(qc_att, seq_len=self.c_len) # Wg concatenate = tf.concat([init, att_rp], axis=2) g = tf.nn.tanh( dense(concatenate, hidden=d, use_bias=False, scope="g")) g_ = dense(g, 1, use_bias=False, scope="g_") gi.append(g_) gi_ = tf.convert_to_tensor(gi) gi = tf.nn.softmax(gi_) self.pr_loss = tf.nn.softmax_cross_entropy_with_logits(logits=gi, labels=self.pr)
def ready(self): config = self.config d = config.hidden batch_size = tf.shape(self.sent)[0] sent_mask = tf.cast(self.sent, tf.bool) sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1) sent_maxlen = tf.reduce_max(sent_len) sent_mask = tf.slice(sent_mask, [0, 0], [batch_size, sent_maxlen]) sent = tf.slice(self.sent, [0, 0], [batch_size, sent_maxlen]) mid_mask = tf.cast(self.mid, tf.bool) mid_len = tf.reduce_sum(tf.cast(mid_mask, tf.int32), axis=1) mid_maxlen = tf.reduce_max(mid_len) mid_mask = tf.slice(mid_mask, [0, 0], [batch_size, mid_maxlen]) mid = tf.slice(self.mid, [0, 0], [batch_size, mid_maxlen]) pat_mask = tf.cast(self.pats, tf.bool) pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1) with tf.variable_scope("embedding"): sent_emb = tf.nn.embedding_lookup(self.word_mat, sent) mid_emb = tf.nn.embedding_lookup(self.word_mat, mid) sent_emb = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=self.is_train, mode="embedding") pat_emb = tf.nn.embedding_lookup(self.word_mat, self.pats) with tf.variable_scope("encoder"): rnn = Cudnn_RNN(num_layers=2, num_units=d // 2) cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False) pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False) cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train) pat_d = dropout(pat, keep_prob=config.keep_prob, is_train=self.is_train) with tf.variable_scope("attention"): att_a = attention(cont_d, config.att_hidden, mask=sent_mask) pat_a = self.pat_a = attention(pat_d, config.att_hidden, mask=pat_mask) with tf.variable_scope("sim"): sim, pat_sim = att_match(mid_emb, pat_emb, mid_mask, pat_mask, d, keep_prob=config.keep_prob, is_train=self.is_train) neg_idxs = tf.matmul(self.rels, tf.transpose(self.rels, [1, 0])) pat_pos = tf.square(tf.maximum(config.tau - pat_sim, 0.)) pat_pos = tf.reduce_max(pat_pos - (1 - neg_idxs) * 1e30, axis=1) pat_neg = tf.square(tf.maximum(pat_sim, 0.)) pat_neg = tf.reduce_max(pat_neg - 1e30 * neg_idxs, axis=1) l_sim = tf.reduce_sum(self.weight * (pat_pos + pat_neg), axis=0) with tf.variable_scope("pred"): att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1) pat2_d = tf.reduce_sum(tf.expand_dims(pat_a, axis=2) * pat_d, axis=1) logit = self.logit = dense(att2_d, config.num_class, use_bias=False) pred = tf.nn.softmax(logit) l_a = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0) xsim = tf.stop_gradient(sim[config.batch_size:]) pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1)) bound = tf.reduce_max(xsim, axis=1) weight = tf.nn.softmax(10 * bound) l_u = tf.reduce_sum( weight * tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[config.batch_size:], labels=pseudo_rel), axis=0) logit = dense(pat2_d, config.num_class, use_bias=False) l_pat = self.pat_loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit, labels=self.rels), axis=0) self.max_val = tf.reduce_sum(pred * -log(pred), axis=1) self.pred = tf.argmax(pred, axis=1) self.loss = l_a + config.alpha * l_pat + config.beta * l_sim + config.gamma * l_u self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(self.sim, axis=1)), axis=1) self.sim_max_val = tf.reduce_max(self.sim, axis=1) self.gold = tf.argmax(self.rel, axis=1) self.max_logit = tf.reduce_max(self.logit, axis=1)
def FIND_module(sent,pats,word_mat,config,is_train,rnn,scope='Find_module'):#sents_emb [batch,maxlength_sent] pats [num_pats,maxlength_pat] [batch,maxlength_sent,dim] with tf.variable_scope(scope,reuse=tf.AUTO_REUSE): keep_prob = config.keep_prob d = config.hidden batch_size = tf.shape(sent)[0] maxlength_sent = tf.shape(sent)[1] dim = tf.shape(word_mat)[1] num_pats = tf.shape(pats)[0] sent_mask = tf.cast(sent, tf.bool) pat_mask = tf.cast(pats, tf.bool) pat_len = tf.reduce_sum(tf.cast(pat_mask, tf.int32), axis=1) with tf.variable_scope('embedding'): sent_emb = tf.nn.embedding_lookup(word_mat, sent) sent_emb_d = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=is_train, mode="embedding") pat_emb = tf.nn.embedding_lookup(word_mat, pats) pat_emb_d = dropout(pat_emb, keep_prob=config.word_keep_prob, is_train=is_train,mode='embedding') with tf.variable_scope('stack'): pad = tf.zeros([batch_size,1,dim],tf.float32) sent_emb_pad = tf.concat([pad,sent_emb,pad],axis=1) sent_emb_stack_2 = tf.reshape(sent_emb_pad,[batch_size,maxlength_sent+2,1,dim]) sent_emb_stack_2 = tf.concat([sent_emb_stack_2[:,0:-1,:],sent_emb_stack_2[:,1:,:]],axis=2) sent_emb_stack_2 = tf.reshape(sent_emb_stack_2,[batch_size*(maxlength_sent+1),2,dim]) sent_emb_pad2 = tf.concat([pad,pad,sent_emb,pad,pad],axis=1) sent_emb_stack_3 = tf.reshape(sent_emb_pad2,[batch_size,maxlength_sent+4,1,dim]) sent_emb_stack_3 = tf.concat([sent_emb_stack_3[:, 0:-2, :], sent_emb_stack_3[:, 1:-1, :], sent_emb_stack_3[:, 2:, :]], axis=2) sent_emb_stack_3 = tf.reshape(sent_emb_stack_3,[batch_size*(maxlength_sent+2),3,dim]) sent_emb_stack_1 = tf.reshape(sent_emb,[batch_size*maxlength_sent,1,dim]) with tf.variable_scope('stack_d'): pad = tf.zeros([batch_size,1,dim],tf.float32) sent_emb_pad_d = tf.concat([pad,sent_emb_d,pad],axis=1) sent_emb_stack_2_d = tf.reshape(sent_emb_pad_d,[batch_size,maxlength_sent+2,1,dim]) sent_emb_stack_2_d = tf.concat([sent_emb_stack_2_d[:,0:-1,:],sent_emb_stack_2_d[:,1:,:]],axis=2) sent_emb_stack_2_d = tf.reshape(sent_emb_stack_2_d,[batch_size*(maxlength_sent+1),2,dim]) sent_emb_pad2_d = tf.concat([pad,pad,sent_emb_d,pad,pad],axis=1) sent_emb_stack_3_d = tf.reshape(sent_emb_pad2_d,[batch_size,maxlength_sent+4,1,dim]) sent_emb_stack_3_d = tf.concat([sent_emb_stack_3_d[:, 0:-2, :], sent_emb_stack_3_d[:, 1:-1, :], sent_emb_stack_3_d[:, 2:, :]], axis=2) sent_emb_stack_3_d = tf.reshape(sent_emb_stack_3_d,[batch_size*(maxlength_sent+2),3,dim]) sent_emb_stack_1_d = tf.reshape(sent_emb_d,[batch_size*maxlength_sent,1,dim]) with tf.variable_scope("encoder"): with tf.variable_scope('encode_pat'): pat, _ = rnn(pat_emb, seq_len=pat_len, concat_layers=False) #[numpats,d] pat_d = dropout(pat, keep_prob=config.keep_prob, is_train=is_train) with tf.variable_scope('encode_sent'): cont_stack_3, _ = rnn(sent_emb_stack_3,seq_len=3 * tf.ones([batch_size * (maxlength_sent + 2)], tf.int32),concat_layers=False) cont_stack_2, _ = rnn(sent_emb_stack_2, seq_len=2*tf.ones([batch_size*(maxlength_sent+1)],tf.int32), concat_layers=False) #[batch_size*(maxlength_sent+1),d] cont_stack_1, _ = rnn(sent_emb_stack_1, seq_len=tf.ones([batch_size*maxlength_sent],tf.int32), concat_layers=False) #[batch_size*maxlength_sent,d] cont_stack_3_d = dropout(cont_stack_3, keep_prob=keep_prob, is_train=is_train) cont_stack_2_d = dropout(cont_stack_2, keep_prob=keep_prob, is_train=is_train) cont_stack_1_d = dropout(cont_stack_1, keep_prob=keep_prob, is_train=is_train) with tf.variable_scope('attention'): pat_d_a = attention(pat_d,config.att_hidden, mask=pat_mask) cont_stack_2_d_a = attention(cont_stack_2_d,config.att_hidden) cont_stack_3_d_a = attention(cont_stack_3_d,config.att_hidden) cont_stack_3_att = tf.reduce_sum(tf.expand_dims(cont_stack_3_d_a, axis=2) * cont_stack_3, axis=1) cont_stack_2_att = tf.reduce_sum(tf.expand_dims(cont_stack_2_d_a, axis=2) * cont_stack_2, axis=1) pat_d_att = tf.reduce_sum(tf.expand_dims(pat_d_a, axis=2) * pat_d, axis=1) pat_att = tf.reduce_sum(tf.expand_dims(pat_d_a, axis=2) * pat, axis=1) cont_stack_1_att = tf.squeeze(cont_stack_1) with tf.variable_scope('emb_attention'): pat_emb_d_a = attention(pat_emb_d, config.att_hidden, mask=pat_mask) pat_emb_d_att = tf.reduce_sum(tf.expand_dims(pat_emb_d_a, axis=2) * pat_emb_d, axis=1) pat_emb_att = tf.reduce_sum(tf.expand_dims(pat_emb_d_a, axis=2) * pat_emb, axis=1) sent_emb_stack_3_d_a = attention(sent_emb_stack_3_d, config.att_hidden) sent_emb_stack_3_att = tf.reduce_sum(tf.expand_dims(sent_emb_stack_3_d_a, axis=2) * sent_emb_stack_3, axis=1) sent_emb_stack_2_d_a = attention(sent_emb_stack_2_d, config.att_hidden) sent_emb_stack_2_att = tf.reduce_sum(tf.expand_dims(sent_emb_stack_2_d_a, axis=2) * sent_emb_stack_2, axis=1) sent_emb_stack_1_att = tf.squeeze(sent_emb_stack_1) with tf.variable_scope('Score'): scores_stack_2 = cosine(cont_stack_2_att,pat_d_att,weighted=False) scores_stack_1 = cosine(cont_stack_1_att,pat_d_att,weighted=False) scores_stack_3 = cosine(cont_stack_3_att, pat_d_att, weighted=False) scores_stack_3 = tf.reshape(scores_stack_3, [batch_size, 1, maxlength_sent + 2, num_pats]) scores_stack_2 = tf.reshape(scores_stack_2,[batch_size,1,maxlength_sent+1,num_pats]) scores_stack_1 = tf.reshape(scores_stack_1,[batch_size,1,maxlength_sent,num_pats]) scores_sim = cosine(pat_att, pat_d_att, weighted=False) with tf.variable_scope('emb_Score'): scores_stack_3_emb = cosine(sent_emb_stack_3_att,pat_emb_d_att) scores_stack_2_emb = cosine(sent_emb_stack_2_att,pat_emb_d_att) scores_stack_1_emb = cosine(sent_emb_stack_1_att,pat_emb_d_att) scores_stack_3_emb = tf.reshape(scores_stack_3_emb, [batch_size, 1, maxlength_sent + 2, num_pats]) scores_stack_2_emb = tf.reshape(scores_stack_2_emb,[batch_size,1,maxlength_sent+1,num_pats]) scores_stack_1_emb = tf.reshape(scores_stack_1_emb,[batch_size,1,maxlength_sent,num_pats]) phi = 0 scores_stack_3 = phi * scores_stack_3_emb + (1 - phi) * scores_stack_3 scores_stack_2 = phi*scores_stack_2_emb+(1-phi)*scores_stack_2 scores_stack_1 = phi*scores_stack_1_emb+(1-phi)*scores_stack_1 scores = tf.concat([scores_stack_3[:,:,0:-2,:],scores_stack_3[:,:,1:-1,:],scores_stack_3[:,:,2:,:],scores_stack_2[:,:,0:-1,:],scores_stack_2[:,:,1:,:],scores_stack_1],axis=1) scores = tf.reshape(scores,[batch_size,6,maxlength_sent,num_pats]) scores = tf.transpose(scores,[0,3,1,2]) scores = tf.reshape(scores,[batch_size*num_pats,6,maxlength_sent]) scores_sim_emb = cosine(pat_emb_att, pat_emb_d_att) scores_sim = phi*scores_sim_emb+(1-phi)*scores_sim with tf.variable_scope('SeqLabel'): seq = tf.layers.dense(tf.transpose(scores,[0,2,1]),1) seq = tf.squeeze(seq) seq = tf.reshape(seq,[batch_size,num_pats,maxlength_sent]) #seq = tf.reshape(tf.reduce_max(scores,axis=1),[batch_size,num_pats,maxlength_sent]) seq = tf.transpose(seq,[0,2,1]) seq = seq*tf.tile(tf.cast(tf.reshape(sent_mask,[batch_size,maxlength_sent,1]),tf.float32),[1,1,num_pats]) return seq,scores_sim
def ready(self): config = self.config d = config.hidden batch_size = tf.shape(self.sent_word)[0] sent_mask = tf.cast(self.sent_word, tf.bool) sent_len = tf.reduce_sum(tf.cast(sent_mask, tf.int32), axis=1) sent_maxlen = config.length sent = self.sent_word pretrain_sent_mask = tf.cast(self.pretrain_sents,tf.bool) rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train) label_mat,_= FIND_module(sent,self.raw_pats,self.word_mat,config,tf.constant(False,tf.bool),rnn) label_mat = tf.sigmoid(label_mat)*tf.tile(tf.reshape(tf.cast(sent_mask,tf.float32),[batch_size,sent_maxlen,1]),[1,1,self.raw_pats.get_shape()[0]]) # label_mat = tf.cast(tf.greater(label_mat,0.7),tf.float32) _,keywords_sim= FIND_module(sent,self.pats,self.word_mat,config,self.is_train,rnn) # keywords_sim = tf.sigmoid(keywords_sim) pretrain_pred_labels,_ = FIND_module(self.pretrain_sents,self.pretrain_pats,self.word_mat,config,self.is_train,rnn) pretrain_pred_labels = tf.transpose(pretrain_pred_labels,[0,2,1]) gather_order = tf.tile(tf.reshape(tf.range(max(config.pretrain_size,config.pretrain_size_together)), [-1, 1]),[1,2]) pretrain_pred_labels = tf.gather_nd(pretrain_pred_labels,gather_order) self.pretrain_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32),axis=1)/tf.reduce_sum(tf.cast(pretrain_sent_mask,tf.float32),axis=1))#tf.losses.mean_squared_error(labels=self.pretrain_labels,predictions=pretrain_pred_labels) self.prt_loss = tf.nn.weighted_cross_entropy_with_logits(targets=self.pretrain_labels,logits=pretrain_pred_labels,pos_weight=config.pos_weight)*tf.cast(pretrain_sent_mask,tf.float32) self.prt_pred = tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32) self.pretrain_pred_labels = tf.reshape(tf.cast(tf.greater(tf.sigmoid(pretrain_pred_labels)*tf.cast(pretrain_sent_mask,tf.float32),config.pretrain_threshold),tf.int32),[-1]) neg_idxs = tf.matmul(self.keywords_rels, tf.transpose(self.keywords_rels, [1, 0])) pat_pos = tf.square(tf.maximum(0.9 - keywords_sim, 0.)) pat_pos = tf.reduce_max(pat_pos - tf.cast(1 - neg_idxs,tf.float32)*tf.constant(1e30,tf.float32), axis=1) pat_neg = tf.square(tf.maximum(keywords_sim, 0.)) pat_neg = tf.reduce_max(pat_neg - tf.constant(1e30,tf.float32) * tf.cast(neg_idxs,tf.float32), axis=1) pat_simloss = tf.reduce_mean(pat_pos + pat_neg,axis=0) # clustering的loss self.sim_loss = sim_loss = pat_simloss self.pretrain_loss_v2 = self.pretrain_loss+self.pretrain_alpha*self.sim_loss sim_raw = [] for i, soft_labeling_function in enumerate(self.labeling_functions_soft): try: sim_raw.append(soft_labeling_function(label_mat, self.raw_keyword_dict, self.mask_mat)( self.phrases_input) * self.type_restrict(i)) except: print(i) sim_raw.append(tf.cast(tf.reshape(0*self.phrases_input[:,0],[1,-1]),tf.float32)) self.sim =sim= tf.transpose(tf.concat(sim_raw,axis=0),[1,0]) #[tf.shape==(batch_size,1)]*num_functions->[batch_size,] with tf.variable_scope("classifier"): sent_emb = tf.nn.embedding_lookup(self.word_mat, sent) sent_emb = dropout(sent_emb, keep_prob=config.word_keep_prob, is_train=self.is_train, mode="embedding") rnn = Cudnn_RNN(num_layers=2, num_units=d // 2, keep_prob=config.keep_prob, is_train=self.is_train) cont, _ = rnn(sent_emb, seq_len=sent_len, concat_layers=False) cont_d = dropout(cont, keep_prob=config.keep_prob, is_train=self.is_train) att_a = attention(cont_d, config.att_hidden, mask=sent_mask) att2_d = tf.reduce_sum(tf.expand_dims(att_a, axis=2) * cont_d, axis=1) logit = dense(att2_d, config.num_class, use_bias=False) pred = tf.nn.softmax(logit) with tf.variable_scope("pred"): if not self.pseudo: sent_loss = self.sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logit, labels=self.rel), axis=0) else: self.hard_train_loss = sent_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[:config.batch_size], labels=self.rel[:config.batch_size]), axis=0) lsim = sim[:config.batch_size] index_tensor = tf.reshape(tf.constant(np.arange(config.batch_size),tf.int32),[config.batch_size,1]) select_tensor = tf.reshape(self.hard_match_func_idx,[config.batch_size,1]) probs = tf.reshape(tf.gather_nd(lsim,tf.concat([index_tensor,select_tensor],axis=1)),[config.batch_size,1]) self.labeled_loss = labeled_loss = tf.reduce_mean(tf.square((1-probs))) xsim = tf.stop_gradient(sim[config.batch_size:]) pseudo_rel = tf.gather(self.rels, tf.argmax(xsim, axis=1)) bound = tf.reduce_max(xsim, axis=1) weight = tf.nn.softmax(10.0 * bound) self.unlabeled_loss = unlabeled_loss = tf.reduce_sum(weight * tf.nn.softmax_cross_entropy_with_logits_v2( logits=logit[config.batch_size:], labels=pseudo_rel), axis=0) sent_loss = self.sent_loss = sent_loss + self.gamma * unlabeled_loss+self.alpha*self.pretrain_loss#+self.alpha*labeled_loss #算entropy来对no_relation推断 self.max_val = entropy = tf.reduce_sum(pred * -log(pred), axis=1) #pred是test时候用到的 self.pred = tf.argmax(pred, axis=1) self.loss = sent_loss + self.beta * sim_loss #similarity model预测出来的结果 self.sim_pred = tf.argmax(tf.gather(self.rels, tf.argmax(self.sim, axis=1)), axis=1) self.sim_max_val = tf.reduce_max(self.sim, axis=1) #true label self.gold = tf.argmax(self.rel, axis=1) self.entropy = tf.reduce_mean(entropy, axis=0)