def main(): while True: start() desision = input('Write your desision sir : ') try: desision = int(desision) except ValueError: print('u should wrine number of comand sir') continue if not desision: print('See you next time sir') break elif desision > 4: print('check list of comands again, sir') continue try: arg_1 = input('first arg: ') arg_2 = input('second arg: ') valid_2(arg_1, arg_2) valid(arg_1, arg_2) except NoSpaces as err: print(f'{err}') except IncorrectInputError as err: print(f'{err}') else: if desision == 1: summ(arg_2, arg_1) elif desision == 2: diff(arg_1, arg_2) elif desision == 3: mult(arg_1, arg_2) elif desision == 4: try: division(arg_1, arg_2) except ZeroDivisionError as err: print(f'second num is zero - {err}!!!')
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru gi = [] att_vP = [] for i in range(config.max_para): with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape(tf.nn.embedding_lookup(\ self.char_mat, self.pr_ch), [N * PL, CL, dc]) # self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) # att is the v_P att_vP.append(att) """ with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) """ with tf.variable_scope("pointer"): # r_Q: init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, att, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul( tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits( logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2) # print losses #condition = tf.greater(self.loss, 11) #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1) #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1) for i in range(config.max_para): # Passage ranking with tf.variable_scope("passage-ranking-attention"): vj_P = dropout(att, keep_prob=keep_prob, is_train=is_train) r_Q = dropout(init, keep_prob=keep_prob, is_train=is_train) r_P = attention(r_Q, vj_P, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) #rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=pr_att.get_shape( #).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) #att_rp = rnn(qc_att, seq_len=self.c_len) # Wg concatenate = tf.concat([init, att_rp], axis=2) g = tf.nn.tanh( dense(concatenate, hidden=d, use_bias=False, scope="g")) g_ = dense(g, 1, use_bias=False, scope="g_") gi.append(g_) gi_ = tf.convert_to_tensor(gi) gi = tf.nn.softmax(gi_) self.pr_loss = tf.nn.softmax_cross_entropy_with_logits(logits=gi, labels=self.pr)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope('emb'): with tf.variable_scope('char'): ch_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout( ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout( qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope('word'): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope('encoding'): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope('attention'): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope('match'): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope('pointer'): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list( )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope('predict'): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=tf.stop_gradient(self.y1)) losses2 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=tf.stop_gradient(self.y2)) self.loss = tf.reduce_mean(losses + losses2)
def ptrspan(self): config = self.config N, QL, CL, d, dc, dg = config.batch_size, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru SN = self.k W = config.glove_dim d = config.hidden print('embedding part') with tf.name_scope("word"): para_emb = tf.nn.embedding_lookup(self.word_mat, self.para_slice) c_emb = self.sentence_slice q_emb = self.q_slice with tf.name_scope("para_encode"): para_emb_linear = tf.layers.dense( para_emb, d, use_bias=False, kernel_initializer=tf.ones_initializer(), trainable=self.is_train, name='para_emb_line') q_emb_linear = tf.layers.dense( q_emb, d, use_bias=False, kernel_initializer=tf.ones_initializer(), trainable=self.is_train, name='q_emb_line') align_pq = tf.matmul(para_emb_linear, tf.transpose(q_emb_linear, [0, 2, 1])) pq_mask = tf.tile(tf.expand_dims(self.q_mask, axis=1), [1, self.para_maxlen, 1]) align_pq = tf.nn.softmax(softmax_mask(align_pq, pq_mask)) align_para_emb = tf.matmul(align_pq, q_emb_linear) para_emb_concat = tf.concat([ para_emb, align_para_emb, self.para_e_slice, self.para_t_slice ], axis=2) self.para_emb = para_emb_concat print('encode-part') # c_emb = self.sentence_slice c_emb_sen = tf.unstack(c_emb, axis=1) sentence_len = tf.unstack(self.sentence_len, axis=1) c_s = [] with tf.variable_scope("sentence_encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb_sen[0].get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) print('passage-encoder') for i in range(SN): c_s_emb = rnn(c_emb_sen[i], seq_len=sentence_len[i], concat_layers=False) c_s.append(c_s_emb) para_gru = rnn(para_emb_concat, seq_len=self.para_len, concat_layers=False) with tf.variable_scope("q_encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=q_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) q = rnn(q_emb, seq_len=self.q_len, concat_layers=False) # c_s_h = [] # with tf.variable_scope("highway_encoding",reuse = tf.AUTO_REUSE): # highway = Highway(hidden_size=2*d,is_train=self.is_train) # for i in range(SN): # c_s_highway = highway(c_s[i]) # c_s_h.append(c_s_highway) # para_gru = highway(para_gru) # q = highway(q) # c_s = c_s_h print('qc_att') self.c_s = c_s self.para_gru = para_gru qc_att = [] sen_mask = tf.unstack(self.sentence_mask, axis=1) with tf.variable_scope("sentence_attention", reuse=tf.AUTO_REUSE): for i in range(SN): qc_att_sample = dot_attention(c_s[i], q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) qc_att.append(qc_att_sample) para_att = dot_attention(para_gru, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) att_s = [] with tf.variable_scope("sentence_qcatt_rnn"): rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att[0].get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) for i in range(SN): att_s_single = rnn(qc_att[i], seq_len=sentence_len[i]) att_s.append(att_s_single) para_s = rnn(para_att, seq_len=self.para_len) self.sentence_att = qc_att self.para_att = para_att self_att = [] with tf.variable_scope("sentence_cpattention", reuse=tf.AUTO_REUSE): for i in range(SN): self_att_single = dot_attention(att_s[i], para_s, mask=self.para_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) self_att.append(self_att_single) with tf.variable_scope("para_selfattn"): # self.para_enc_slice, mask = self.para_enc_mask_slice, para_self_att = dot_attention(para_s, para_s, mask=self.para_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) self.sentence_selfatt = self_att self.para_selfatt = para_self_att match = [] with tf.variable_scope("sentence_cp_rnn"): rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att[0].get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) for i in range(SN): match_single = rnn(self_att[i], seq_len=sentence_len[i]) match.append(match_single) para_match = rnn(para_self_att, seq_len=self.para_len) self.match = match dense_prob = [] dense_con = [] with tf.variable_scope("dense_prob", reuse=tf.AUTO_REUSE): for i in range(SN): sentence_con = tf.concat([c_s[i], att_s[i], match[i]], axis=2) prob = dense_summ(sentence_con, d, mask=sen_mask[i], keep_prob=config.keep_prob, is_train=self.is_train) dense_prob.append(prob) dense_con.append(sentence_con) # with tf.variable_scope("para_prob"): para_con = tf.concat([para_gru, para_s, para_match], axis=2) para_prob = dense_summ(para_con, d, mask=self.para_mask, keep_prob=config.keep_prob, is_train=self.is_train) dense_prob.append(para_prob) dense_prob = tf.concat(dense_prob, axis=1) self.topk = tf.nn.softmax(dense_prob) batch_nums = tf.range(0, limit=N) batch_nums = tf.expand_dims(batch_nums, 1) batch_nums = tf.tile(batch_nums, [1, self.sentence_maxlen]) lo_shape = tf.constant([N, config.para_limit]) sentence_index_slice = tf.unstack(self.sentence_index_slice, axis=1) # how to ensure the probability # sentence1,sentence2,setence3,q,para =?*4 lo1 = [] lo2 = [] with tf.variable_scope("sentence_pointer", reuse=tf.AUTO_REUSE): self.init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.keep_prob, is_train=self.is_train) pointer = ptr_net_span(batch=N, hidden=self.init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) indice_test = [] lo1_test = [] lo2_test = [] present = [] present_inp = [] for i in range(SN): logits1, logits2, inp1, inp2 = pointer(self.init, dense_con[i], d, sen_mask[i]) logits1 = logits1 * tf.cast(sen_mask[i], tf.float32) logits2 = logits2 * tf.cast(sen_mask[i], tf.float32) indice = tf.stack([batch_nums, sentence_index_slice[i]], axis=2) inp = tf.stack([inp1, inp2], axis=1) present.append(inp) present_inp.append(inp2) lo1_test.append(logits1) lo2_test.append(logits2) indice_test.append(indice) self.lo1 = lo1_test[0] self.lo2 = lo1_test[1] self.lo3 = lo1_test[2] lo1 = [ tf.slice(tf.scatter_nd(in1, in2, lo_shape), [0, 0], [N, self.para_maxlen]) for (in1, in2) in zip(indice_test, lo1_test) ] lo2 = [ tf.slice(tf.scatter_nd(in1, in2, lo_shape), [0, 0], [N, self.para_maxlen]) for (in1, in2) in zip(indice_test, lo2_test) ] with tf.variable_scope("para_pointer"): para_pointer = ptr_net_span( batch=N, hidden=self.init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) para_lo1, para_lo2, inp1, inp2 = para_pointer( self.init, para_match, d, self.para_mask) present_para = tf.stack([inp1, inp2], axis=1) para_lo1 = softmax_mask(para_lo1, self.para_mask) para_lo2 = softmax_mask(para_lo2, self.para_mask) present.append(tf.tile(present_para, [1, 1, 3])) present_inp.append(inp2) lo1.append(para_lo1) lo2.append(para_lo2) self.lo4 = para_lo2 self.present = tf.stack(present, axis=2) out_lo1 = tf.stack(lo1, axis=1) out_lo2 = tf.stack(lo2, axis=1) out_lo1 = (tf.expand_dims(self.topk, axis=2)) * out_lo1 out_logits1 = tf.reduce_sum(out_lo1, axis=1) # out_logits1 = tf.slice(out_logits1, [0, 0], [N, self.para_maxlen]) # out_logits1 = softmax_mask(out_logits1, self.para_mask) out_lo2 = (tf.expand_dims(self.topk, axis=2)) * out_lo2 out_logits2 = tf.reduce_sum(out_lo2, axis=1) # out_logits2 = tf.slice(out_logits2, [0, 0], [N, self.para_maxlen]) # out_logits2 = softmax_mask(out_logits2, self.para_mask) self.out_lo1 = out_lo1 self.out_lo2 = out_logits1 # out_logits1 = tf.nn.softmax(out_logits1) # out_logits2 = tf.nn.softmax(out_logits2) outer = tf.matmul( tf.expand_dims(tf.nn.softmax(out_logits1), axis=2), tf.expand_dims(tf.nn.softmax(out_logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) with tf.variable_scope("predict"): self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=out_logits1, labels=tf.stop_gradient(self.y1_slice)) losses2 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=out_logits2, labels=tf.stop_gradient(self.y2_slice)) prob_y1 = tf.expand_dims(tf.reduce_max(tf.reduce_max(outer, axis=2), axis=1), axis=1) prob_y2 = tf.expand_dims(tf.reduce_max(tf.reduce_max(outer, axis=1), axis=1), axis=1) prob = tf.concat([prob_y1, prob_y2], axis=1) lossRL = -tf.log(prob) * self.reward_Diff self.out1 = losses self.out2 = losses2 loss = tf.concat([ tf.expand_dims(losses, axis=1), tf.expand_dims(losses2, axis=1) ], axis=1) final_reward = loss * self.reward_Diff self.loss3 = tf.reduce_mean((losses + losses2)) lam = config.lam self.loss_span = tf.reduce_mean(final_reward)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru max_para = tf.reduce_max(self.passage_count) self.cell_fw = tf.contrib.rnn.GRUCell(dg) self.cell_bw = tf.contrib.rnn.GRUCell(dg) vp_concat = tf.zeros([N, 1, 300], tf.float32) clen_concat = tf.zeros([N, 1], tf.int32) c_mask_concat = tf.cast(tf.zeros([N, 1]), tf.bool) y1_concat = y2_concat = tf.zeros([N, 1]) seq_mask_concat = tf.cast(tf.zeros([N, 1]), tf.bool) # maybe seq mask is = c_mask q = tf.zeros([N, 1, 1]) for i in range(config.max_para): i_ = tf.constant(i) #print_out(i) def vp(): att, c_len, c_mask, y1, y2, seq_mask = self.get_vp(i) #print(att) #print(c_len) #print(c_mask) #print(y1) #print(y2) #print(seq_mask) #print(q) c_len = tf.reshape(c_len, [N, 1]) att, c_len, c_mask, y1, y2, seq_mask = tf.cond( tf.equal(i_, tf.constant(0)), lambda: (att, c_len, c_mask, y1, y2, seq_mask), lambda: ( tf.concat([vp_concat, att], axis=1), tf.concat([clen_concat, c_len], axis=1), tf.concat([c_mask_concat, c_mask], axis=1), tf.concat([y1_concat, y1], axis=1), tf.concat([y2_concat, y2], axis=1), tf.concat([seq_mask_concat, seq_mask], axis=1), )) return att, c_len, c_mask, y1, y2, seq_mask def dummy(): return vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat \ = tf.cond(i_ < max_para, vp, dummy) vp_mask_count = tf.reduce_sum(clen_concat, axis=1) # max count w.r.t original concatenated context (self.c_len) vpmccl = vp_mask_max_count_c_like = tf.reduce_max(vp_mask_count) # max count w.r.t concatenated vp (self.att_vP) ##### not used: vp_mask_max_count = tf.reduce_max(tf.reduce_max(clen_concat)) vp_final_pad_meta = vp_mask_max_count_c_like - vp_mask_count # dont know why this diff happens, but it does diff = tf.shape(self.c_mask)[-1] - vp_mask_max_count_c_like vp_final_pad_seq = tf.sequence_mask(vp_final_pad_meta + diff) seq_mask_concat = tf.concat([seq_mask_concat, vp_final_pad_seq], axis=1) pad_length = tf.reduce_max(vp_final_pad_meta) + diff paddings = tf.convert_to_tensor([[0, 0], [0, pad_length], [0, 0]]) new_vp = tf.pad(vp_concat, paddings, "CONSTANT") new_vp = tf.reshape(tf.boolean_mask(new_vp, seq_mask_concat), [N, vpmccl + diff, 2 * config.hidden]) """ new_vp = tf.Print(new_vp,["vp_mask_max_count_c_like",vp_mask_max_count_c_like, "vp_final_pad_meta",vp_final_pad_meta, "vp_concat",tf.shape(vp_concat),"new_vp",tf.shape(new_vp), "c_mask",tf.shape(self.c_mask),"seq_mask_concat",tf.shape(seq_mask_concat), "clen_concat",clen_concat,"c_mask_last",self.c_mask[:,-1], "vp_mask_count",vp_mask_count,"c_len",self.c_len], summarize=N*10,message="SHORT") """ #self.c_mask = tf.concat([self.c_mask,vp_final_pad_seq],axis=1) with tf.variable_scope("pointer"): # r_Q: init = summ(self.q_enc[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) #logits1, logits2 = pointer(init, new_vp, d, self.c_mask) logits1, logits2 = pointer(init, new_vp, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2)
def ready(self): config = self.config N, PL, QL, d = config.batch_size, self.c_maxlen, self.q_maxlen, config.hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.name_scope("embedding"): with tf.name_scope("title"): t_emb = tf.nn.embedding_lookup(self.word_mat, self.t) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) # c_emb = tf.concat([c_emb, ch_emb], axis=2) # q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list( )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) # answer predict with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) #对答案区间进行限制 #outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits( logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2) # document selected with tf.variable_scope("select"): # batch_size dim c_cum = attention_pooling(match, init, self.c_mask, hidden=d) fuse = tf.concat([c_cum, init], axis=1) fuse = dense(fuse, hidden=d, use_bias=False, scope = "fully1") fuse = dense(fuse, hidden=1, use_bias=False, scope = "fully2") # batch_size 1 logits_s = tf.sigmoid(fuse) fuse = tf.squeeze(fuse) self.s = tf.cast(self.s, tf.float32) self.loss_s = tf.nn.sigmoid_cross_entropy_with_logits(logits=fuse, labels=self.s)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout( ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout( qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list( )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits( logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape(tf.nn.embedding_lookup( self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout( ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout( qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) #[10, ?,300] with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list( )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("content_modeling"): logits4, c_semantics = content_model(init, match, config.hidden) with tf.variable_scope("cross_passage_attention"): self.query_num = int(config.batch_size/config.passage_num) c_semantics = tf.reshape(c_semantics, shape=[self.query_num, config.passage_num, -1]) attnc_key = tf.tile(tf.expand_dims(c_semantics, axis=2), [1, 1, config.passage_num, 1]) attnc_mem = tf.tile(tf.expand_dims(c_semantics, axis=1), [1, config.passage_num, 1, 1]) attnc_w = tf.reduce_sum(attnc_key*attnc_mem, axis=-1) attnc_mask = tf.ones([config.passage_num, config.passage_num])-tf.diag([1.0]*config.passage_num) attnc_w = tf.nn.softmax(attnc_w*attnc_mask, axis=-1) attncp = tf.reduce_sum(tf.tile(tf.expand_dims(attnc_w, axis=-1), [1, 1, 1, 2*config.hidden])*attnc_mem, axis= 2) with tf.variable_scope("pseudo_label"): self.is_select = tf.reshape(tf.squeeze(self.is_select), shape=[self.query_num, config.passage_num]) self.is_select = self.is_select/tf.tile(tf.reduce_sum(self.is_select, axis=-1, keepdims=True), [1, config.passage_num]) sim_matrix = attnc_w lb_matrix = tf.tile(tf.expand_dims(self.is_select, axis=1), [1, config.passage_num, 1]) self.pse_is_select = tf.reduce_sum(sim_matrix*lb_matrix, axis=-1) + tf.constant([0.00000001]*config.passage_num, dtype=tf.float32) # avoid all zero self.pse_is_select = self.pse_is_select/tf.tile(tf.reduce_sum(self.pse_is_select, axis=-1, keepdims=True), [1,config.passage_num]) alpha = 0.7 self.fuse_label = alpha*self.is_select + (1-alpha)*tf.stop_gradient(self.pse_is_select) with tf.variable_scope("predict_passage"): init = tf.reshape(init, shape=[self.query_num, config.passage_num, -1]) attn_concat = tf.concat([init, attncp, c_semantics], axis=-1) d1 = tf.layers.dense(attn_concat, 2*config.hidden, activation= tf.nn.leaky_relu, bias_initializer= tf.glorot_uniform_initializer()) #150 d2 = tf.layers.dense(d1, config.hidden, activation= tf.nn.leaky_relu, bias_initializer= tf.glorot_uniform_initializer()) #75 logits3 = tf.squeeze(tf.layers.dense(d2, 1, activation= None, bias_initializer= tf.glorot_uniform_initializer())) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 30) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) #logits3 = tf.reduce_max(tf.reduce_max(outer, axis=2), axis=1) self.is_select_p = tf.nn.sigmoid(logits3) losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=tf.stop_gradient(self.y1)) losses2 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=tf.stop_gradient(self.y2)) weighted_losses = weighted_loss(config, 0.000001, self.y1, losses) #0.01 weighted_losses2 = weighted_loss(config, 0.000001, self.y2, losses2) #0.01 losses3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits3, labels=tf.stop_gradient(self.fuse_label))) in_answer_weight = tf.ones_like(self.in_answer) + 3*self.in_answer losses4 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits( logits=logits4, labels=tf.stop_gradient(self.in_answer))*in_answer_weight, axis=-1) weighted_losses4 = weighted_loss(config, 0.000001, self.in_answer, losses4) self.loss_dict = {'pos_s loss':losses, 'pos_e loss':losses2, 'select loss':losses3, 'in answer':losses4} for key, values in self.loss_dict.items(): self.loss_dict[key] = tf.reduce_mean(values) self.loss = tf.reduce_mean(weighted_losses + weighted_losses2 + losses3+ weighted_losses4)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = native_rnn c_elmo_features = self.elmo(self.c_elmo) q_elmo_features = self.elmo(self.q_elmo) with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_elmo_emb = weight_layers('embedding', c_elmo_features, l2_coef=0.0, do_layer_norm=False)['weighted_op'] tf.get_variable_scope().reuse_variables() q_elmo_emb = weight_layers('embedding', q_elmo_features, l2_coef=0.0, do_layer_norm=False)['weighted_op'] c_elmo_emb = dropout(c_elmo_emb, keep_prob=config.elmo_keep_prob, is_train=self.is_train) q_elmo_emb = dropout(q_elmo_emb, keep_prob=config.elmo_keep_prob, is_train=self.is_train) c_emb = tf.concat([c_emb, ch_emb, c_elmo_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb, q_elmo_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(config.cell, num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(config.cell, num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(config.cell, num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) c_elmo_enc = weight_layers('encoding', c_elmo_features, l2_coef=0.0, do_layer_norm=False)['weighted_op'] tf.get_variable_scope().reuse_variables() q_elmo_enc = weight_layers('encoding', q_elmo_features, l2_coef=0.0, do_layer_norm=False)['weighted_op'] c_elmo_enc = dropout(c_elmo_enc, keep_prob=config.elmo_keep_prob, is_train=self.is_train) q_elmo_enc = dropout(q_elmo_enc, keep_prob=config.elmo_keep_prob, is_train=self.is_train) match = tf.concat([match, c_elmo_enc], -1) q = tf.concat([q, q_elmo_enc], -1) with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=tf.stop_gradient(self.y1)) losses2 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=tf.stop_gradient(self.y2)) self.loss = tf.reduce_mean(losses + losses2)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train, name_scope="attention_layer") rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) tf.summary.histogram('vt_P', att) self.att_logits = tf.get_collection('Softmax_logits')[0] self.att_outputs = tf.get_collection('MatMul_outputs')[0] with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train, name_scope="match_layer") rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) tf.summary.histogram('self_match', match) self.match_logits = tf.get_collection('Softmax_logits')[1] self.match_outputs = tf.get_collection('MatMul_outputs')[1] with tf.variable_scope("pointer"): # r_Q: init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) tf.summary.histogram('rQ_init', init) tf.summary.histogram('pointer_logits_1', logits1) tf.summary.histogram('pointer_logits_2', logits2) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2) #### self.predict_outer_start = tf.reduce_max(outer, axis=2) self.predict_outer_end = tf.reduce_max(outer, axis=1) """
def ready(self): config = self.config N, PL, QL, d = config.batch_size, self.c_maxlen, self.q_maxlen, config.hidden keep_prob, is_train = config.keep_prob, config.is_train gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): with tf.name_scope("word"): c = tf.nn.embedding_lookup(self.word_mat, self.c) q = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c, self.fs, self.fe], axis=2) q_emb = q with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c_enc, bw_final_state_c = rnn(c_emb, seq_len=self.c_len) q_enc, bw_final_state_q = rnn(q_emb, seq_len=self.q_len) encoder_outputs = tf.concat([c_enc, q_enc], axis=1) bw_final_state = (bw_final_state_c, bw_final_state_q) with tf.variable_scope("attention"): bi_final_hidden = dropout(bw_final_state, keep_prob=keep_prob, is_train=is_train) source_sequence_length = tf.add(PL, QL) logits, sample_id, final_context_state = _build_decoder( encoder_outputs, bi_final_hidden, config, is_train, source_sequence_length, target_sequence_length, target_input, embedding_decoder) """ qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train, name_scope="attention_layer") rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) # att is the v_P if i==0: att_vP = att else: att_vP = tf.concat([att_vP, att], axis=1) #att = tf.Print(att,[att],message="att:") print("att:",att.get_shape().as_list()) print("att_vP:",att_vP.get_shape().as_list()) """ with tf.variable_scope("pointer"): # r_Q: init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) print("rQ:", init.get_shape().as_list()) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, att_vP, d, self.c_pr_mask) tf.summary.histogram('rQ_init', init) tf.summary.histogram('pointer_logits_1', logits1) tf.summary.histogram('pointer_logits_2', logits2) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1_pr) losses2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=self.y2_pr) #losses1_2 = tf.reduce_mean(losses1_2, axis=0) self.loss = tf.reduce_mean(losses + losses2) # print losses #condition = tf.greater(self.loss, 11) #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1) #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1) if config.with_passage_ranking: gi = None for i in range(config.max_para): # Passage ranking if i == 0: with tf.variable_scope("passage-ranking-attention"): #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:") vj_P = att_vP[:, i * 400:(i + 1) * 400, :] pr_att = pr_attention( batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train, name_scope="passage_ranking_att_layer") r_P = pr_att(init, vj_P, d, self.c_mask) tf.summary.histogram('r_P_' + str(i), r_P) #r_P = tf.Print(r_P,[r_P],message="r_p") # Wg concatenate = tf.concat([init, r_P], axis=1) g = tf.nn.tanh( dense(concatenate, hidden=d, use_bias=False, scope="g", name_scope="dense_pr_att_layer_1")) g_ = dense(g, 1, use_bias=False, scope="g_", name_scope="dense_pr_att_layer_2") #g = tf.Print(g,[g],message="g") if i == 0: gi = tf.reshape(g_, [N, 1]) else: gi = tf.concat([gi, tf.reshape(g_, [N, 1])], axis=1) else: with tf.variable_scope("passage-ranking-attention", reuse=True): #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:") vj_P = att_vP[:, i * 400:(i + 1) * 400, :] pr_att = pr_attention( batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train, name_scope="passage_ranking_att_layer") r_P = pr_att(init, vj_P, d, self.c_mask) tf.summary.histogram('r_P_' + str(i), r_P) #r_P = tf.Print(r_P,[r_P],message="r_p") # Wg concatenate = tf.concat([init, r_P], axis=1) g = tf.nn.tanh( dense(concatenate, hidden=d, use_bias=False, scope="g", name_scope="dense_pr_att_layer_1")) g_ = dense(g, 1, use_bias=False, scope="g_", name_scope="dense_pr_att_layer_2") #g = tf.Print(g,[g],message="g") if i == 0: gi = tf.reshape(g_, [N, 1]) else: gi = tf.concat([gi, tf.reshape(g_, [N, 1])], axis=1) tf.summary.histogram('gi', gi) #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32) #self.gi = tf.nn.softmax(gi_) #self.losses3 = tf.nn.softmax_cross_entropy_with_logits( # logits=gi_, labels=tf.reshape(self.pr,[-1,1])) self.losses3 = tf.nn.softmax_cross_entropy_with_logits( logits=gi, labels=self.pr) #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3), # tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:") self.pr_loss = tf.reduce_mean(self.losses3) #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss]) self.r = tf.constant(0.8) self.e_loss1 = tf.multiply(self.r, self.loss) self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r), self.pr_loss) self.e_loss = tf.add(self.e_loss1, self.e_loss2)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = \ config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, \ config.char_dim, config.char_hidden gru = CudnnGRU if config.use_cudnn else NativeGRU with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) # representation of paragraph q = rnn(q_emb, seq_len=self.q_len) # representation of question with tf.variable_scope( "attention" ): # gated att rnn (using dot att from Attention is All You Need actually) qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): # self-matching rnn self_att = dot_attention(att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = PointerNet(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): # with tf.variable_scope("char"): # ch_emb = tf.reshape(tf.nn.embedding_lookup( # self.char_mat, self.ch), [N * PL, CL, dc]) # qh_emb = tf.reshape(tf.nn.embedding_lookup( # self.char_mat, self.qh), [N * QL, CL, dc]) # # ch_emb = dropout( # ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) # qh_emb = dropout( # qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) # # cell_fw = tf.contrib.rnn.GRUCell(dg) # cell_bw = tf.contrib.rnn.GRUCell(dg) # # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( # cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) # ch_emb = tf.concat([state_fw, state_bw], axis=1) # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( # cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) # qh_emb = tf.concat([state_fw, state_bw], axis=1) # qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) # ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) embedding = tf.get_variable( 'embedding', [config.vocab_size, config.embedding_size], initializer=tf.random_uniform_initializer(minval=-0.05, maxval=0.05)) self.regularizer = tf.nn.l2_loss(embedding) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(embedding, self.c) q_emb = tf.nn.embedding_lookup(embedding, self.q) c_emb = dropout(c_emb, keep_prob=config.keep_prob, is_train=self.is_train) q_emb = dropout(q_emb, keep_prob=config.keep_prob, is_train=self.is_train) c_emb = tf.reshape(c_emb, [N, PL, config.embedding_size]) q_emb = tf.reshape(q_emb, [N, QL, config.embedding_size]) # c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) # q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) # c_emb = tf.concat([c_emb, ch_emb], axis=2) # q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): # 1层 lstm对输出进行编码 rnn_c = gru(num_layers=1, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) rnn_q = gru(num_layers=1, num_units=d, batch_size=N, input_size=q_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn_c(c_emb, seq_len=self.c_len) q = rnn_q(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): M = tf.matmul(c, q, adjoint_b=True) M_mask = tf.to_float( tf.matmul(tf.cast(tf.expand_dims(self.c_mask, -1), tf.int32), tf.cast(tf.expand_dims(self.q_mask, 1), tf.int32))) alpha = softmax(M, 1, M_mask) # (batch_size,M,N) beta = softmax(M, 2, M_mask) # (batch_size,M,N) # query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1) query_importance = tf.expand_dims( tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(PL, -1)), -1) # (batch_size,N,1) s = tf.squeeze(tf.matmul(alpha, query_importance), [2]) # (batch_size,M) #unpacked_s = zip(tf.unstack(s, config.batch_size), tf.unstack(self.c, config.batch_size)) #y_hat=(batch_size,config.vocab_size) (代表每个词为答案的概率) #y_hat = tf.stack([tf.unsorted_segment_sum(attentions, sentence_ids, config.vocab_size) for (attentions, sentence_ids) in unpacked_s]) match = c * tf.reshape(s, [-1, PL, 1]) #(batch_size,max_c_len,dim) #通过embedding q 获得rQ with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): self.start_logits = tf.nn.softmax(logits1) self.stop_logits = tf.nn.softmax(logits2) outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean( losses + losses2) + config.l2_reg * self.regularizer
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): #3层 lstm对输出进行编码 rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) #with the size(batch_size,max_len,hidden_dim) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("relation analysis"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) g_theta_layers = [256, 128, 1] # attention component md = Relation_Module(config, self.c_maxlen, self.q_maxlen, g_theta_layers) #r add attention weight with q_summary r, alpha = md.hop_2(c, init, phase=self.is_train, activation=tf.nn.relu) c = r[-1] with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) #通过embedding q 获得rQ with tf.variable_scope("pointer"): # init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, # keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): self.start_logits = tf.nn.softmax(logits1) self.stop_logits = tf.nn.softmax(logits2) outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2)
def ready(self): config = self.config N, PL, QL, CL, BL, d, dc, dg, dbpe, dbpeh = config.batch_size, self.c_maxlen, self.q_maxlen, \ config.char_limit, config.bpe_limit, config.hidden, \ config.glove_dim if config.pretrained_char else config.char_dim, config.char_hidden, \ config.bpe_glove_dim if config.pretrained_bpe_emb else config.bpe_dim, config.bpe_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): if config.use_char: with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) if config.use_bpe: with tf.variable_scope("bpe"): cb_emb = tf.reshape( tf.nn.embedding_lookup(self.bpe_mat, self.cb), [N * PL, BL, dbpe]) qb_emb = tf.reshape( tf.nn.embedding_lookup(self.bpe_mat, self.qb), [N * QL, BL, dbpe]) cb_emb = dropout(cb_emb, keep_prob=config.keep_prob, is_train=self.is_train) qb_emb = dropout(qb_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dbpeh) cell_bw = tf.contrib.rnn.GRUCell(dbpeh) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, cb_emb, self.cb_len, dtype=tf.float32) cb_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qb_emb, self.qb_len, dtype=tf.float32) qb_emb = tf.concat([state_fw, state_bw], axis=1) qb_emb = tf.reshape(qb_emb, [N, QL, 2 * dbpeh]) cb_emb = tf.reshape(cb_emb, [N, PL, 2 * dbpeh]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) if config.use_char: c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) if config.use_bpe: c_emb = tf.concat([c_emb, cb_emb], axis=2) q_emb = tf.concat([q_emb, qb_emb], axis=2) if config.use_pos: cp_emb = tf.nn.embedding_lookup(self.pos_mat, self.cp) qp_emb = tf.nn.embedding_lookup(self.pos_mat, self.qp) c_emb = tf.concat([c_emb, cp_emb], axis=2) q_emb = tf.concat([q_emb, qp_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru gi = [] att_vP = [] for i in range(config.max_para): print(i) with tf.variable_scope("emb" + str(i)): with tf.variable_scope("char" + str(i)): #CL = tf.Print(CL,[CL],message="CL:") #PL = tf.Print(PL,[PL],message="PL:") #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr.get_shape()],message="ch_pr:") self.ch_pr_ = self.ch_pr[:, i * 400:(i + 1) * 400, :] print(self.ch_pr_.get_shape()) #self.c_pr = tf.reshape(self.c_pr, [N, 12, PL]) #print(self.ch.get_shape()) #print(self.ch_pr.get_shape()) #print(self.c.get_shape()) #print(self.c_pr.get_shape()) #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr[:,2:,:]],message="ch_pr") ch_emb = tf.reshape(tf.nn.embedding_lookup(\ self.char_mat, self.ch_pr_), [N * PL, CL, dc]) # self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb") #qh_emb = tf.Print(qh_emb,[qh_emb],message="qh_emb") qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) #state_fw = tf.Print(state_fw,[state_fw],message="state_fw") #state_bw = tf.Print(state_bw,[state_bw],message="state_bw") qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb") with tf.name_scope("word" + str(i)): c_emb = tf.nn.embedding_lookup( self.word_mat, self.c_pr[:, i * 400:(i + 1) * 400]) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding" + str(i)): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention" + str(i)): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) # att is the v_P if i == 0: att_vP = att else: att_vP = tf.concat([att_vP, att], axis=1) #att = tf.Print(att,[att],message="att:") print("att:", att.get_shape().as_list()) print("att_vP:", att_vP.get_shape().as_list()) #att_vP = tf.Print(att_vP,[tf.shape(att_vP)],message="att_vP:") """ with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) """ with tf.variable_scope("pointer"): # r_Q: init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) print("rQ:", init.get_shape().as_list()) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, att, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) #losses1_2 = tf.reduce_mean(losses1_2, axis=0) self.loss = tf.reduce_mean(losses + losses2) # print losses #condition = tf.greater(self.loss, 11) #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1) #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1) if config.with_passage_ranking: gi = None for i in range(config.max_para): # Passage ranking with tf.variable_scope("passage-ranking-attention" + str(i)): #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:") vj_P = att_vP[:, i * 400:(i + 1) * 400, :] pr_att = pr_attention( batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) r_P = pr_att(init, vj_P, d, self.c_mask) #r_P = tf.Print(r_P,[r_P],message="r_p") # Wg concatenate = tf.concat([init, r_P], axis=1) g = tf.nn.tanh( dense(concatenate, hidden=d, use_bias=False, scope="g" + str(i))) g_ = dense(g, 1, use_bias=False, scope="g_" + str(i)) #g = tf.Print(g,[g],message="g") if i == 0: gi = tf.reshape(g_, [N, 1]) else: gi = tf.concat([gi, tf.reshape(g_, [N, 1])], axis=1) #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32) #self.gi = tf.nn.softmax(gi_) #self.losses3 = tf.nn.softmax_cross_entropy_with_logits( # logits=gi_, labels=tf.reshape(self.pr,[-1,1])) self.losses3 = tf.nn.softmax_cross_entropy_with_logits( logits=gi, labels=self.pr) #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3), # tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:") self.pr_loss = tf.reduce_mean(self.losses3) #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss]) self.r = tf.constant(0.8) self.e_loss1 = tf.multiply(self.r, self.loss) self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r), self.pr_loss) self.e_loss = tf.add(self.e_loss1, self.e_loss2)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, \ config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn_gru else native_gru gi = [] #att_vP = [] self.cell_fw = tf.contrib.rnn.GRUCell(dg) self.cell_bw = tf.contrib.rnn.GRUCell(dg) self.rnn1 = None self.rnn2 = None self.att_vP = tf.zeros([N, 1, 2 * d]) c_pr_mask = self.c_pr_mask qtemp = tf.zeros([N, 1, 900]) # _c = concatenation cmax_c = tf.zeros([5], tf.int32) clen_c = tf.zeros([N, 5], tf.int32) """ self.rnn1 = gru(num_layers=3, num_units=d, batch_size=N, input_size=500,\ keep_prob=config.keep_prob, is_train=self.is_train) self.rnn2 = gru(num_layers=1, num_units=d, batch_size=N, input_size=1800,\ keep_prob=config.keep_prob, is_train=self.is_train) """ result, self.att_vP, q, self.answer_info, self.y1, self.y2, self.c_pr_mask, cmax_c, clen_c = \ tf.while_loop(self.condition, self.get_vP, loop_vars=[self.i,self.att_vP,qtemp, \ self.answer_info,self.y1,self.y2,c_pr_mask, cmax_c, clen_c], shape_invariants= \ [self.i.get_shape(), tf.TensorShape([N, None, 2*d]), tf.TensorShape([N, None, 900]), \ self.answer_info.get_shape(), tf.TensorShape([None, None]), tf.TensorShape([None, None]), \ tf.TensorShape([None, None]), tf.TensorShape([None]), tf.TensorShape([N, None])]) tf.summary.histogram('att_vP', self.att_vP) #att_vP = tf.Print(att_vP,[tf.shape(att_vP)],message="att_vP:") """ with tf.variable_scope("match"): self_att = dot_attention( att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape( ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) """ with tf.variable_scope("pointer"): # r_Q: #self.att_vP = tf.Print(self.att_vP,[tf.shape(self.att_vP),tf.shape(self.c_pr_mask)], # message="pointer:") #self.att_vP = tf.Print(self.att_vP,[tf.greater(self.att_vP,y1),tf.shape(self.c_mask)], # message="pointer:") init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) print("rQ:", init.get_shape().as_list()) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, self.att_vP, d, self.c_pr_mask) logits1 = tf.Print(logits1, [tf.nn.softmax(logits1)], message="logits1", summarize=100) logits2 = tf.Print(logits2, [tf.nn.softmax(logits2)], message="logits2", summarize=100) tf.summary.histogram('rQ_init', init) tf.summary.histogram('pointer_logits_1', logits1) tf.summary.histogram('pointer_logits_2', logits2) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) def condition_j(y1, y2, j, ny1, ny2, cm, cl): return tf.less(j, N) def batch_j(y1, y2, j, new_y1, new_y2, cm, cl): loop_var_i = tf.constant(0, tf.int32) #loop_var_i = tf.Print(loop_var_i,[loop_var_i],message="loop_var_i") y1, y2, j, i, new_y1, new_y2, cm, cl = tf.while_loop( condition_i, passage_i, loop_vars=[y1, y2, j, loop_var_i, new_y1, new_y2, cm, cl], shape_invariants=[ tf.TensorShape([None]), tf.TensorShape([None]), j.get_shape(), loop_var_i.get_shape(), tf.TensorShape([N]), tf.TensorShape([N]), tf.TensorShape([None]), tf.TensorShape([None, None]) ]) j = tf.add(j, tf.constant(1)) return y1, y2, j, new_y1, new_y2, cm, cl def passage_i(y1, y2, j, i, new_y1, new_y2, cm, cl): def c1_1(): indices1 = tf.reshape(j, [1, 1]) updates1 = tf.reshape(cm[i], [1]) shape1 = tf.reshape(N, [1]) scatter1 = tf.scatter_nd(indices1, updates1, shape1) y1_ = tf.subtract(y1, scatter1) indices2 = tf.reshape(j, [1, 1]) updates2 = tf.reshape(cl[j, i], [1]) shape2 = tf.reshape(N, [1]) scatter2 = tf.scatter_nd(indices2, updates2, shape2) new_y1_ = tf.add(new_y1, scatter2) return y1_, new_y1_ def c2_1(): indices1 = tf.reshape(j, [1, 1]) updates1 = tf.reshape(y1[j], [1]) shape1 = tf.reshape(N, [1]) scatter1 = tf.scatter_nd(indices1, updates1, shape1) new_y1_ = tf.add(new_y1, scatter1) return y1, new_y1_ def c1_2(): indices1 = tf.reshape(j, [1, 1]) updates1 = tf.reshape(cm[i], [1]) shape1 = tf.reshape(N, [1]) scatter1 = tf.scatter_nd(indices1, updates1, shape1) y2_ = tf.subtract(y2, scatter1) indices2 = tf.reshape(j, [1, 1]) updates2 = tf.reshape(cl[j, i], [1]) shape2 = tf.reshape(N, [1]) scatter2 = tf.scatter_nd(indices2, updates2, shape2) new_y2_ = tf.add(new_y2, scatter2) return y2_, new_y2_ def c2_2(): indices1 = tf.reshape(j, [1, 1]) updates1 = tf.reshape(y2[j], [1]) shape1 = tf.reshape(N, [1]) scatter1 = tf.scatter_nd(indices1, updates1, shape1) new_y2_ = tf.add(new_y2, scatter1) return y2, new_y2_ #y1,new_y1 = tf.cond(cond_i_1, c1_1, c2_1) #y2,new_y2 = tf.cond(cond_i_2, c1_2, c2_2) #i = tf.Print(i,[i],message="loop_var_i") #j = tf.Print(j,[j],message="loop_var_j") y1, new_y1 = tf.cond(y1[j] > cm[i], c1_1, c2_1) y2, new_y2 = tf.cond(y2[j] > cm[i], c1_2, c2_2) i = tf.add(i, tf.constant(1)) return y1, y2, j, i, new_y1, new_y2, cm, cl def condition_i(y1, y2, j, i, ny1, ny2, cm, cl): #self.para_count = tf.Print(self.para_count,[self.para_count[j]],message="para_count j") return tf.less(i, self.para_count[j]) new_yp1 = tf.zeros([N], tf.int32) new_yp2 = tf.zeros([N], tf.int32) #cmax_c = tf.cast(cmax_c,tf.int32) #clen_c = tf.cast(clen_c,tf.int32) loop_var_j = tf.constant(0, tf.int32) self.yp1, self.yp2 = tf.cast(self.yp1, tf.int32), tf.cast( self.yp2, tf.int32) self.yp1, self.yp2, loop_var_j, new_yp1, new_yp2, cm, cl = tf.while_loop(condition_j, batch_j, loop_vars=[self.yp1, self.yp2, loop_var_j, new_yp1, new_yp1,\ cmax_c, clen_c],\ shape_invariants=[tf.TensorShape([None]), tf.TensorShape([None]), loop_var_j.get_shape(), tf.TensorShape([N]), tf.TensorShape([N]), tf.TensorShape([None]), tf.TensorShape([None,None])]) #self.yp1 = tf.Print(self.yp1,[self.yp1],message="yp1",summarize=N) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) losses = tf.Print(losses, [losses], message="losses", summarize=20) losses2 = tf.Print(losses2, [losses2], message="losses2", summarize=20) #losses1_2 = tf.reduce_mean(losses1_2, axis=0) self.loss = tf.reduce_mean(losses + losses2) print(self.loss) # print losses #condition = tf.greater(self.loss, 11) #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1) #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1) if config.with_passage_ranking: gi = None for i in range(config.max_para): # Passage ranking if i == 0: with tf.variable_scope("passage-ranking-attention"): #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:") vj_P = self.att_vP[:, i * 400:(i + 1) * 400, :] pr_att = pr_attention( batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train, name_scope="passage_ranking_att_layer") r_P = pr_att(init, vj_P, d, self.c_mask) tf.summary.histogram('r_P_' + str(i), r_P) #r_P = tf.Print(r_P,[r_P],message="r_p") # Wg concatenate = tf.concat([init, r_P], axis=1) g = tf.nn.tanh( dense(concatenate, hidden=d, use_bias=False, scope="g", name_scope="dense_pr_att_layer_1")) g_ = dense(g, 1, use_bias=False, scope="g_", name_scope="dense_pr_att_layer_2") #g = tf.Print(g,[g],message="g") if i == 0: gi = tf.reshape(g_, [N, 1]) else: gi = tf.concat([gi, tf.reshape(g_, [N, 1])], axis=1) else: with tf.variable_scope("passage-ranking-attention", reuse=True): #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:") vj_P = self.att_vP[:, i * 400:(i + 1) * 400, :] pr_att = pr_attention( batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train, name_scope="passage_ranking_att_layer") r_P = pr_att(init, vj_P, d, self.c_mask) tf.summary.histogram('r_P_' + str(i), r_P) #r_P = tf.Print(r_P,[r_P],message="r_p") # Wg concatenate = tf.concat([init, r_P], axis=1) g = tf.nn.tanh( dense(concatenate, hidden=d, use_bias=False, scope="g", name_scope="dense_pr_att_layer_1")) g_ = dense(g, 1, use_bias=False, scope="g_", name_scope="dense_pr_att_layer_2") #g = tf.Print(g,[g],message="g") if i == 0: gi = tf.reshape(g_, [N, 1]) else: gi = tf.concat([gi, tf.reshape(g_, [N, 1])], axis=1) tf.summary.histogram('gi', gi) #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32) #self.gi = tf.nn.softmax(gi_) #self.losses3 = tf.nn.softmax_cross_entropy_with_logits( # logits=gi_, labels=tf.reshape(self.pr,[-1,1])) self.losses3 = tf.nn.softmax_cross_entropy_with_logits( logits=gi, labels=self.pr) #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3), # tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:") self.pr_loss = tf.reduce_mean(self.losses3) #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss]) self.r = tf.constant(0.8) self.e_loss1 = tf.multiply(self.r, self.loss) self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r), self.pr_loss) self.e_loss = tf.add(self.e_loss1, self.e_loss2)
def ready(self): config = self.config N, QL, CL, d, dc, dg = config.batch_size, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru SN, SL = self.c_s_maxnum, self.c_s_maxlen W = config.glove_dim print('embedding part') with tf.variable_scope("emb"): # with tf.variable_scope("char"): # ch_emb = tf.reshape(tf.nn.embedding_lookup( # self.char_mat, self.csh_slice), [N, SN * SL, CL, dc], name='char_reshape') # qh_emb = tf.reshape(tf.nn.embedding_lookup( # self.char_mat, self.qh_slice), [N, QL, CL, dc]) # ch_emb = dropout( # ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) # qh_emb = dropout( # qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) # ch_emb_char = tf.unstack(ch_emb, axis=0) # qh_emb_char = tf.unstack(qh_emb, axis=0) ''' filter_size = [3, 4, 5] att_char = [] merge_char = [] q_merge_char = [] for filter in filter_size: with tf.variable_scope("char-cnnencoder-%s" % filter): step_merge_char = [] step_att_char = [] q_step_merge_char = [] q_step_att_char = [] for i in range(2): if i==0: input_char=ch_emb else: input_char=qh_emb conv_branch_char = tf.layers.conv2d( inputs=input_char, # use as many filters as the hidden size filters=50, kernel_size=filter, use_bias=True, activation=tf.nn.relu, trainable=True, padding='SAME', name = 'conv_char_' + str(filter), reuse = tf.AUTO_REUSE, data_format='channels_last' ) if i ==0: step_att_char.append(conv_branch_char) # pool over the words to obtain: [first_dim x 1* hidden_size] pool_branch_char = tf.reduce_max(conv_branch_char, axis=2) merge_char.append(pool_branch_char) else: q_step_att_char.append(conv_branch_char) # pool over the words to obtain: [first_dim x 1* hidden_size] q_pool_branch_char = tf.reduce_max(conv_branch_char, axis=2) q_merge_char.append(q_pool_branch_char) # batch_merge = tf.stack(step_merge_char, axis=0) # merge_char.append(batch_merge) # batch_merge_q = tf.stack(q_step_merge_char, axis=0) # q_merge_char.append(batch_merge_q) ch_con = tf.concat(merge_char, axis=-1) ch_con = tf.reshape(ch_con,[N,SN,SL,150]) qh_con = tf.concat(q_merge_char,axis=-1) ''' # if(use_char): # with tf.variable_scope("char"): # ch_emb = tf.reshape(tf.nn.embedding_lookup( # self.char_mat, self.csh), [N * SN * SL, CL, dc], name='char_reshape') # qh_emb = tf.reshape(tf.nn.embedding_lookup( # self.char_mat, self.qh), [N * QL, CL, dc]) # ch_emb = dropout( # ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) # qh_emb = dropout( # qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) # cell_fw = tf.contrib.rnn.GRUCell(dg) # cell_bw = tf.contrib.rnn.GRUCell(dg) # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( # cell_fw, cell_bw, ch_emb, self.csh_len, dtype=tf.float32) # ch_emb = tf.concat([state_fw, state_bw], axis=1) # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( # cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) # qh_emb = tf.concat([state_fw, state_bw], axis=1) # qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) # ch_emb = tf.reshape(ch_emb, [N, SN, SL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.cs_slice) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q_slice) with tf.name_scope("softemb"): c_emb_linear = tf.nn.relu( dense(c_emb, d, use_bias=True, scope="c_emb_linear")) q_emb_linear = tf.nn.relu( dense(q_emb, d, use_bias=True, scope="q_emb_linear")) c_emb_linear = tf.reshape( c_emb_linear, [N, self.c_s_maxnum * self.c_s_maxlen, d]) align_cq = tf.matmul(c_emb_linear, tf.transpose(q_emb_linear, [0, 2, 1])) cq_mask = tf.tile(tf.expand_dims(self.q_mask, axis=1), [1, self.c_s_maxnum * self.c_s_maxlen, 1]) self.align_cq = tf.nn.softmax(softmax_mask(align_cq, cq_mask)) align_c_emb = tf.matmul(self.align_cq, q_emb_linear) align_c_emb = tf.reshape( align_c_emb, [N, self.c_s_maxnum, self.c_s_maxlen, d]) c_emb = tf.concat( [c_emb, align_c_emb, self.ce_slice, self.ct_slice], axis=3) c_emb = tf.reshape( c_emb, [N, self.c_s_maxnum, self.c_s_maxlen, W + d + 3 + 19], name='c_emb_reshape') q_emb = tf.concat([q_emb, self.qt_slice], axis=2) self.c_emb = c_emb self.q_emb = q_emb # c_emb = tf.reshape(c_emb, [N,self.c_s_maxnum,self.c_s_maxlen,W+self.q_maxlen]) print('encode-part') # c_s_len = tf.unstack(self.c_s_len, axis=1) cnn_out = [] c_s_emb = tf.unstack(c_emb, axis=0) # q_s_emb = tf.expand_dims(q_emb, axis=1) # q_sample_emb = tf.unstack(q_s_emb, axis = 0) filter_size = [3, 4, 5] att = [] merge = [] q_merge = [] with tf.variable_scope("cnnencoder"): for filter in filter_size: step_merge = [] step_att = [] q_step_merge = [] q_step_att = [] with tf.variable_scope("cnnencoder-%s" % filter): for i in range(N): conv_branch = tf.layers.conv1d( inputs=c_s_emb[i], # use as many filters as the hidden size filters=100, kernel_size=[filter], use_bias=True, activation=tf.nn.relu, trainable=True, padding='SAME', name='conv_' + str(filter), reuse=tf.AUTO_REUSE) # tf.get_variable_scope().reuse_variables() step_att.append(conv_branch) # pool over the words to obtain: [first_dim x 1* hidden_size] pool_branch = tf.reduce_max(conv_branch, axis=1) pool_branch = dropout(pool_branch, keep_prob=config.keep_prob, is_train=self.is_train) step_merge.append(pool_branch) batch_merge = tf.stack(step_merge, axis=0) merge.append(batch_merge) # batch_merge_q = tf.stack(q_step_merge, axis = 0) # q_merge.append(batch_merge_q) con = tf.concat(merge, axis=-1) # q_con = tf.concat(q_merge, axis = -1) # # attention_vis = tf.stack(att, axis=0) # attention_vis = tf.reduce_mean(attention_vis, axis=0) # cnn_out.append(con) # c_sen_emb = tf.concat(con, axis = 0) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=con.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) print('passage-encoder') c_s = rnn(con, seq_len=self.c_p_len) # q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("qencode"): with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=q_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) q = rnn(q_emb, seq_len=self.q_len) self.q_enc = q print('qc_att') with tf.variable_scope("attention"): qc_att = dot_attention(c_s, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) self.att_s = rnn(qc_att, seq_len=self.c_p_len) # print('pointer') with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train, is_sentence=True) logits1 = pointer(init, self.att_s, d, self.c_p_mask) self.lo = logits1 with tf.variable_scope("predict"): self.outer = tf.nn.softmax(logits1) self.yp = tf.argmax(self.outer, axis=1) losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=tf.stop_gradient(self.y_slice)) self.out1 = tf.nn.top_k(self.outer, config.k).values self.policy = tf.nn.top_k(self.outer, 1).values self.policy = tf.reduce_sum(tf.nn.top_k(self.outer, config.k).values, axis=-1, keepdims=True) self.policy_log_part = tf.log(self.policy) #self.loss = tf.reduce_mean(-1 * self.policy_log_part * self.reward) reward = self.advantage reward_mean, reward_var = tf.nn.moments(reward, axes=[0]) reward_std = tf.sqrt(reward_var) + 1e-6 self.reward_mean = reward_mean self.reward_var = reward_std reward = tf.div(reward - reward_mean, reward_std) self.final_reward = reward - self.baseline self.loss = tf.reduce_mean(-1 * self.policy_log_part * self.advantage)
def ready(self): N, PL, QL, CL, d, dc, dg = 64, self.c_maxlen, self.q_maxlen, char_limit, hidden, char_dim, char_hidden gru = cudnn_gru if use_cudnn else native_gru with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1]) c = rnn(c_emb, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1]) att = rnn(qc_att, seq_len=self.c_len) with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, hidden=d) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1]) match = rnn(self_att, seq_len=self.c_len) with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1]) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) #outer = tf.matrix_band_part(outer, 0, 15) outer = tf.matrix_band_part(outer, 0, 12) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru max_para = tf.reduce_max(self.passage_count) self.cell_fw = tf.contrib.rnn.GRUCell(dg) self.cell_bw = tf.contrib.rnn.GRUCell(dg) vp_concat = tf.zeros([N,1,300],tf.float32) clen_concat = tf.zeros([N,1],tf.int32) c_mask_concat = tf.cast(tf.zeros([N,1]),tf.bool) y1_concat = y2_concat = tf.zeros([N,1]) seq_mask_concat = tf.cast(tf.zeros([N,1]),tf.bool) # maybe seq mask is = c_mask q = tf.zeros([N,1,1]) for i in range(config.max_para): i_ = tf.constant(i) #print_out(i) def vp(): att, c_len, c_mask, y1, y2, seq_mask = self.get_vp(i) c_len = tf.reshape(c_len,[N,1]) att, c_len, c_mask, y1, y2, seq_mask = tf.cond( tf.equal(i_,tf.constant(0)), lambda: (att, c_len, c_mask, y1, y2, seq_mask), lambda: ( tf.concat([vp_concat, att], axis=1), tf.concat([clen_concat, c_len], axis=1), tf.concat([c_mask_concat, c_mask], axis=1), tf.concat([y1_concat, y1], axis=1), tf.concat([y2_concat, y2], axis=1), tf.concat([seq_mask_concat, seq_mask], axis=1), ) ) return att, c_len, c_mask, y1, y2, seq_mask def dummy(): return vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat \ = tf.cond(i_ < max_para, vp, dummy) vp_mask_count = tf.reduce_sum(clen_concat, axis=1) # max count w.r.t original concatenated context (self.c_len) vpmccl = vp_mask_max_count_c_like = tf.reduce_max(vp_mask_count) # max count w.r.t concatenated vp (self.att_vP) ##### not used: vp_mask_max_count = tf.reduce_max(tf.reduce_max(clen_concat)) vp_final_pad_meta = vp_mask_max_count_c_like - vp_mask_count # dont know why this diff happens, but it does diff = tf.shape(self.c_mask)[-1] - vp_mask_max_count_c_like vp_final_pad_seq = tf.sequence_mask(vp_final_pad_meta+diff) seq_mask_concat1 = tf.concat([seq_mask_concat, vp_final_pad_seq], axis=1) pad_length = tf.reduce_max(vp_final_pad_meta)+diff paddings = tf.convert_to_tensor([[0, 0], [0, pad_length], [0, 0]]) new_vp = tf.pad(vp_concat, paddings, "CONSTANT") new_vp = tf.reshape(tf.boolean_mask(new_vp, seq_mask_concat1), [N, vpmccl+diff, 2*config.hidden] ) """ new_vp = tf.Print(new_vp,["vp_mask_max_count_c_like",vp_mask_max_count_c_like, "vp_final_pad_meta",vp_final_pad_meta, "vp_concat",tf.shape(vp_concat),"new_vp",tf.shape(new_vp), "c_mask",tf.shape(self.c_mask),"seq_mask_concat",tf.shape(seq_mask_concat), "clen_concat",clen_concat,"c_mask_last",self.c_mask[:,-1], "vp_mask_count",vp_mask_count,"c_len",self.c_len], summarize=N*10,message="SHORT") """ #self.c_mask = tf.concat([self.c_mask,vp_final_pad_seq],axis=1) with tf.variable_scope("pointer"): # r_Q: init = summ(self.q_enc[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list( )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) #logits1, logits2 = pointer(init, new_vp, d, self.c_mask) logits1, logits2 = pointer(init, new_vp, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits( logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits( logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2) #losses = tf.nn.softmax_cross_entropy_with_logits_v2( # logits=logits1, labels=tf.stop_gradient(self.y1)) #losses2 = tf.nn.softmax_cross_entropy_with_logits_v2( # logits=logits2, labels=tf.stop_gradient(self.y2)) #self.loss = tf.reduce_mean(losses + losses2) c_max = tf.reduce_max(clen_concat, axis=0) print(c_max) g_concat = tf.zeros([N,1]) count = tf.constant(0) if config.with_passage_ranking: with tf.variable_scope("passage_ranking"): for i in range(config.max_para): i_ = tf.constant(i) def passage_ranking(): global count print(c_max[i]) if i==0: #vp = tf.slice(vp_concat,[0,0,0],[N,c_max[i],]) c_max1 = tf.Print(c_max,[c_max],message="C_MAX") vp = vp_concat[:,:c_max[i],:] mask = seq_mask_concat[:,:c_max[i]] count = c_max[i] else: vp = vp_concat[:,count:count+c_max[i],:] mask = seq_mask_concat[:,count:count+c_max[i]] count += c_max[i] #g = pr_attention(init, vp, mask=mask, hidden=d, # keep_prob=config.keep_prob, is_train=self.is_train, name_scope="rP_attention") #g = tf.reshape(g,[N,1]) g = summ2(vp, init, max_para, d, mask, keep_prob=config.keep_prob, is_train=self.is_train, scope="summ") if i==0: return g,count return tf.concat([g_concat,g],axis=1),count def dummy(): return g_concat,count g_concat,count = tf.cond(i_ < max_para, passage_ranking,dummy) self.losses3 = tf.nn.softmax_cross_entropy_with_logits( logits=g_concat, labels=self.passage_ranking) #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3), # tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:") self.pr_loss = tf.reduce_mean(self.losses3) #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss]) r = tf.constant(0.8) one_minus_r = tf.constant(0.2) self.ee_loss1 = tf.multiply(r,self.loss) self.ee_loss2 = tf.multiply(one_minus_r,self.pr_loss) self.ee_loss = tf.add(self.ee_loss1, self.ee_loss2)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden gru = cudnn_gru if config.use_cudnn else native_gru with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = dropout(ch_emb, keep_prob=config.keep_prob, is_train=self.is_train) qh_emb = dropout(qh_emb, keep_prob=config.keep_prob, is_train=self.is_train) cell_fw = tf.contrib.rnn.GRUCell(dg) cell_bw = tf.contrib.rnn.GRUCell(dg) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32) ch_emb = tf.concat([state_fw, state_bw], axis=1) _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn( cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32) qh_emb = tf.concat([state_fw, state_bw], axis=1) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) self.c_emb = c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) bad_c_emb = tf.stop_gradient(c_emb) bad_q_emb = tf.stop_gradient(q_emb) with tf.variable_scope("encoding"): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=bad_c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) self.c_rnn = rnn(bad_c_emb, seq_len=self.c_len) self.q_rnn = rnn(bad_q_emb, seq_len=self.q_len) badptr_c = tf.stop_gradient(self.c_rnn) badptr_q = tf.stop_gradient(self.q_rnn) old_rnn = rnn with tf.variable_scope("badptr_attention"): qc_att, self.badptr_qc_att = dot_attention( badptr_c, badptr_q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train, give=True) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) self.att = [rnn(qc_att, seq_len=self.c_len)] self.att += [self.att[-1][:, -1, :]] with tf.variable_scope("badptr_dense"): for _ in range(3): self.att += [ tf.nn.dropout(tf.keras.layers.Dense(300)(self.att[-1]), keep_prob=config.keep_prob) ] with tf.variable_scope("badptr"): init = self.att[-1] pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, self.att[0], d, self.c_mask) with tf.variable_scope("badptr_predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.bad_yp1_distrib = tf.reduce_max(outer, axis=2) self.bad_yp2_distrib = tf.reduce_max(outer, axis=1) self.bad_yp1 = tf.argmax(self.bad_yp1_distrib, axis=1) self.bad_yp2 = tf.argmax(self.bad_yp2_distrib, axis=1) losses = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits1, labels=tf.stop_gradient(self.bad_y1)) losses2 = tf.nn.softmax_cross_entropy_with_logits_v2( logits=logits2, labels=tf.stop_gradient(self.bad_y2)) self.loss = tf.reduce_mean(losses + losses2) # recompute c with bitmask left = tf.sequence_mask(self.bad_yp1, tf.shape(c_emb)[1]) right = tf.logical_not( tf.sequence_mask(self.bad_yp2 + 1, tf.shape(c_emb)[1])) self.combo = combo = tf.logical_or(left, right) ### FOR TESTING ### ## self.combo = combo = tf.cast(tf.ones_like(combo), tf.bool) def adjust(c_emb_combo): c_emb, combo = c_emb_combo foo = c_emb bar = tf.boolean_mask(foo, combo) return tf.cond( tf.logical_and(tf.equal(combo[0], False), tf.equal(combo[1], True)), false_fn=lambda: tf.pad( bar, [[0, tf.shape(foo)[0] - tf.shape(bar)[0]], [0, 0]]), true_fn=lambda: foo) self.c_emb_new = c_emb_new = tf.map_fn(adjust, (c_emb, combo), dtype=(tf.float32)) self.c_len = tf.reduce_sum(tf.cast( tf.logical_and(self.c_mask, self.combo), tf.int32), axis=-1) self.c_mask = tf.sequence_mask( tf.reduce_sum(tf.cast(tf.logical_and(self.c_mask, self.combo), tf.int32), axis=-1), tf.shape(self.c_mask)[1]) with tf.variable_scope("encoding", reuse=True): rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train, super_hacky_reload=True) #### SEQ LEN HAS TO BE FIXED!!!! #### c = rnn(c_emb_new, seq_len=self.c_len) q = rnn(q_emb, seq_len=self.q_len) self.c_ck = c self.q_ck = c ### MAKE SURE THESE ARE RUN!!! ### print('RUN ASSIGN TRICK OPS (model.assign_trick_ops)!!') self.assign_trick_ops = [] for i in range(len(rnn.init_fw)): self.assign_trick_ops += [ tf.assign(rnn.init_fw[i], old_rnn.init_fw[i]) ] self.assign_trick_ops += [ tf.assign(rnn.init_bw[i], old_rnn.init_bw[i]) ] with tf.variable_scope("attention"): qc_att, self.qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train, give=True) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) att = rnn(qc_att, seq_len=self.c_len) self.att_ck = att with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train) rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape().as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train) match = rnn(self_att, seq_len=self.c_len) self.match_ck = match with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=config.ptr_keep_prob, is_train=self.is_train) pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train) logits1, logits2 = pointer(init, match, d, self.c_mask) with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1_distrib = tf.reduce_max(outer, axis=2) self.yp2_distrib = tf.reduce_max(outer, axis=1) self.yp1 = tf.argmax(self.yp1_distrib, axis=1) self.yp2 = tf.argmax(self.yp2_distrib, axis=1)
def ready(self): config = self.config N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden with tf.variable_scope("emb"): with tf.variable_scope("char"): ch_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape( tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) _, qh_emb = stacked_gru(qh_emb, dg, num_layers=1, seq_len=self.qh_len, keep_prob=self.keep_prob, is_train=self.is_train) tf.get_variable_scope().reuse_variables() _, ch_emb = stacked_gru(ch_emb, dg, num_layers=1, seq_len=self.ch_len, keep_prob=self.keep_prob, is_train=self.is_train) qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg]) ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg]) with tf.name_scope("word"): c_emb = tf.nn.embedding_lookup(self.word_mat, self.c) q_emb = tf.nn.embedding_lookup(self.word_mat, self.q) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) with tf.variable_scope("encoding"): c, _ = stacked_gru(c_emb, d, batch=N, num_layers=3, seq_len=self.c_len, keep_prob=self.keep_prob, is_train=self.is_train) tf.get_variable_scope().reuse_variables() q, _ = stacked_gru(q_emb, d, batch=N, num_layers=3, seq_len=self.q_len, keep_prob=self.keep_prob, is_train=self.is_train) with tf.variable_scope("attention"): qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d, keep_prob=self.keep_prob, is_train=self.is_train) att, _ = stacked_gru(qc_att, d, num_layers=1, seq_len=self.c_len, keep_prob=self.keep_prob, is_train=self.is_train) with tf.variable_scope("match"): self_att = dot_attention(att, att, mask=self.c_mask, hidden=d, keep_prob=self.keep_prob, is_train=self.is_train) match, _ = stacked_gru(self_att, d, num_layers=1, seq_len=self.c_len, keep_prob=self.keep_prob, is_train=self.is_train) with tf.variable_scope("pointer"): init = summ(q[:, :, -2 * d:], d, mask=self.q_mask, keep_prob=self.ptr_keep_prob, is_train=self.is_train) d_match = dropout(match, keep_prob=self.ptr_keep_prob, is_train=self.is_train) hidden = init.get_shape().as_list()[-1] cell_fw = GRUCell(hidden) cell_bw = GRUCell(hidden) with tf.variable_scope("fw"): inp, logits1_fw = pointer(d_match, init, d, mask=self.c_mask) _, state = cell_fw(inp, init) tf.get_variable_scope().reuse_variables() _, logits2_fw = pointer(d_match, state, d, mask=self.c_mask) with tf.variable_scope("bw"): inp, logits2_bw = pointer(d_match, init, d, mask=self.c_mask) _, state = cell_bw(inp, init) tf.get_variable_scope().reuse_variables() _, logits1_bw = pointer(d_match, state, d, mask=self.c_mask) logits1 = (logits1_fw + logits1_bw) / 2. logits2 = (logits2_fw + logits2_bw) / 2. with tf.variable_scope("predict"): outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2)