예제 #1
0
def main():
    while True:

        start()
        desision = input('Write your desision sir : ')
        try:
            desision = int(desision)
        except ValueError:
            print('u should wrine number of comand sir')
            continue
        if not desision:
            print('See you next time sir')
            break
        elif desision > 4:
            print('check list of comands again, sir')
            continue

        try:
            arg_1 = input('first arg: ')
            arg_2 = input('second arg: ')
            valid_2(arg_1, arg_2)
            valid(arg_1, arg_2)
        except NoSpaces as err:
            print(f'{err}')
        except IncorrectInputError as err:
            print(f'{err}')
        else:
            if desision == 1:

                summ(arg_2, arg_1)
            elif desision == 2:

                diff(arg_1, arg_2)
            elif desision == 3:

                mult(arg_1, arg_2)
            elif desision == 4:

                try:

                    division(arg_1, arg_2)
                except ZeroDivisionError as err:

                    print(f'second num  is zero - {err}!!!')
예제 #2
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        gi = []
        att_vP = []
        for i in range(config.max_para):
            with tf.variable_scope("emb"):
                with tf.variable_scope("char"):
                    ch_emb = tf.reshape(tf.nn.embedding_lookup(\
                     self.char_mat, self.pr_ch), [N * PL, CL, dc])
                    #	self.char_mat, self.ch), [N * PL, CL, dc])
                    qh_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.qh),
                        [N * QL, CL, dc])
                    ch_emb = dropout(ch_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    qh_emb = dropout(qh_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dg)
                    cell_bw = tf.contrib.rnn.GRUCell(dg)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        ch_emb,
                        self.ch_len,
                        dtype=tf.float32)
                    ch_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qh_emb,
                        self.qh_len,
                        dtype=tf.float32)
                    qh_emb = tf.concat([state_fw, state_bw], axis=1)
                    qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                    ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

                with tf.name_scope("word"):
                    c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                    q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

                c_emb = tf.concat([c_emb, ch_emb], axis=2)
                q_emb = tf.concat([q_emb, qh_emb], axis=2)

            with tf.variable_scope("encoding"):
                rnn = gru(num_layers=3,
                          num_units=d,
                          batch_size=N,
                          input_size=c_emb.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                c = rnn(c_emb, seq_len=self.c_len)
                q = rnn(q_emb, seq_len=self.q_len)

            with tf.variable_scope("attention"):
                qc_att = dot_attention(c,
                                       q,
                                       mask=self.q_mask,
                                       hidden=d,
                                       keep_prob=config.keep_prob,
                                       is_train=self.is_train)
                rnn = gru(num_layers=1,
                          num_units=d,
                          batch_size=N,
                          input_size=qc_att.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                att = rnn(qc_att, seq_len=self.c_len)
                # att is the v_P
                att_vP.append(att)
            """
			with tf.variable_scope("match"):
				self_att = dot_attention(
					att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
				rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
				).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
				match = rnn(self_att, seq_len=self.c_len)
			"""
            with tf.variable_scope("pointer"):

                # r_Q:
                init = summ(q[:, :, -2 * d:],
                            d,
                            mask=self.q_mask,
                            keep_prob=config.ptr_keep_prob,
                            is_train=self.is_train)

                pointer = ptr_net(batch=N,
                                  hidden=init.get_shape().as_list()[-1],
                                  keep_prob=config.ptr_keep_prob,
                                  is_train=self.is_train)
                logits1, logits2 = pointer(init, att, d, self.c_mask)

            with tf.variable_scope("predict"):
                outer = tf.matmul(
                    tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                    tf.expand_dims(tf.nn.softmax(logits2), axis=1))
                outer = tf.matrix_band_part(outer, 0, 15)
                self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
                self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
                losses = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits1, labels=self.y1)
                losses2 = tf.nn.softmax_cross_entropy_with_logits(
                    logits=logits2, labels=self.y2)
                self.loss = tf.reduce_mean(losses + losses2)

                # print losses
                #condition = tf.greater(self.loss, 11)
                #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
                #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        for i in range(config.max_para):
            # Passage ranking
            with tf.variable_scope("passage-ranking-attention"):
                vj_P = dropout(att, keep_prob=keep_prob, is_train=is_train)
                r_Q = dropout(init, keep_prob=keep_prob, is_train=is_train)
                r_P = attention(r_Q,
                                vj_P,
                                mask=self.c_mask,
                                hidden=d,
                                keep_prob=config.keep_prob,
                                is_train=self.is_train)

                #rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=pr_att.get_shape(
                #).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
                #att_rp = rnn(qc_att, seq_len=self.c_len)

                # Wg
                concatenate = tf.concat([init, att_rp], axis=2)
                g = tf.nn.tanh(
                    dense(concatenate, hidden=d, use_bias=False, scope="g"))
                g_ = dense(g, 1, use_bias=False, scope="g_")
                gi.append(g_)
        gi_ = tf.convert_to_tensor(gi)
        gi = tf.nn.softmax(gi_)
        self.pr_loss = tf.nn.softmax_cross_entropy_with_logits(logits=gi,
                                                               labels=self.pr)
예제 #3
0
파일: model.py 프로젝트: mryimings/Nucleus
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope('emb'):
            with tf.variable_scope('char'):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope('word'):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope('encoding'):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope('attention'):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope('match'):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope('pointer'):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope('predict'):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(losses + losses2)
예제 #4
0
    def ptrspan(self):
        config = self.config
        N, QL, CL, d, dc, dg = config.batch_size, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden

        gru = cudnn_gru if config.use_cudnn else native_gru
        SN = self.k
        W = config.glove_dim
        d = config.hidden

        print('embedding part')

        with tf.name_scope("word"):
            para_emb = tf.nn.embedding_lookup(self.word_mat, self.para_slice)
            c_emb = self.sentence_slice
            q_emb = self.q_slice

        with tf.name_scope("para_encode"):

            para_emb_linear = tf.layers.dense(
                para_emb,
                d,
                use_bias=False,
                kernel_initializer=tf.ones_initializer(),
                trainable=self.is_train,
                name='para_emb_line')
            q_emb_linear = tf.layers.dense(
                q_emb,
                d,
                use_bias=False,
                kernel_initializer=tf.ones_initializer(),
                trainable=self.is_train,
                name='q_emb_line')
            align_pq = tf.matmul(para_emb_linear,
                                 tf.transpose(q_emb_linear, [0, 2, 1]))
            pq_mask = tf.tile(tf.expand_dims(self.q_mask, axis=1),
                              [1, self.para_maxlen, 1])
            align_pq = tf.nn.softmax(softmax_mask(align_pq, pq_mask))
            align_para_emb = tf.matmul(align_pq, q_emb_linear)
            para_emb_concat = tf.concat([
                para_emb, align_para_emb, self.para_e_slice, self.para_t_slice
            ],
                                        axis=2)
            self.para_emb = para_emb_concat

        print('encode-part')
        # c_emb = self.sentence_slice

        c_emb_sen = tf.unstack(c_emb, axis=1)
        sentence_len = tf.unstack(self.sentence_len, axis=1)
        c_s = []
        with tf.variable_scope("sentence_encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb_sen[0].get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)

            print('passage-encoder')
            for i in range(SN):
                c_s_emb = rnn(c_emb_sen[i],
                              seq_len=sentence_len[i],
                              concat_layers=False)

                c_s.append(c_s_emb)
            para_gru = rnn(para_emb_concat,
                           seq_len=self.para_len,
                           concat_layers=False)

        with tf.variable_scope("q_encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=q_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            q = rnn(q_emb, seq_len=self.q_len, concat_layers=False)

        # c_s_h = []
        # with tf.variable_scope("highway_encoding",reuse = tf.AUTO_REUSE):
        #     highway = Highway(hidden_size=2*d,is_train=self.is_train)
        #     for i in range(SN):
        #         c_s_highway = highway(c_s[i])
        #         c_s_h.append(c_s_highway)
        #     para_gru = highway(para_gru)
        #     q = highway(q)
        # c_s = c_s_h

        print('qc_att')
        self.c_s = c_s
        self.para_gru = para_gru
        qc_att = []
        sen_mask = tf.unstack(self.sentence_mask, axis=1)
        with tf.variable_scope("sentence_attention", reuse=tf.AUTO_REUSE):
            for i in range(SN):
                qc_att_sample = dot_attention(c_s[i],
                                              q,
                                              mask=self.q_mask,
                                              hidden=d,
                                              keep_prob=config.keep_prob,
                                              is_train=self.is_train)
                qc_att.append(qc_att_sample)

            para_att = dot_attention(para_gru,
                                     q,
                                     mask=self.q_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)

        att_s = []
        with tf.variable_scope("sentence_qcatt_rnn"):
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att[0].get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            for i in range(SN):
                att_s_single = rnn(qc_att[i], seq_len=sentence_len[i])
                att_s.append(att_s_single)
            para_s = rnn(para_att, seq_len=self.para_len)

        self.sentence_att = qc_att
        self.para_att = para_att

        self_att = []

        with tf.variable_scope("sentence_cpattention", reuse=tf.AUTO_REUSE):
            for i in range(SN):
                self_att_single = dot_attention(att_s[i],
                                                para_s,
                                                mask=self.para_mask,
                                                hidden=d,
                                                keep_prob=config.keep_prob,
                                                is_train=self.is_train)
                self_att.append(self_att_single)

        with tf.variable_scope("para_selfattn"):
            # self.para_enc_slice, mask = self.para_enc_mask_slice,
            para_self_att = dot_attention(para_s,
                                          para_s,
                                          mask=self.para_mask,
                                          hidden=d,
                                          keep_prob=config.keep_prob,
                                          is_train=self.is_train)

        self.sentence_selfatt = self_att
        self.para_selfatt = para_self_att

        match = []
        with tf.variable_scope("sentence_cp_rnn"):
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att[0].get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            for i in range(SN):
                match_single = rnn(self_att[i], seq_len=sentence_len[i])
                match.append(match_single)
            para_match = rnn(para_self_att, seq_len=self.para_len)
        self.match = match

        dense_prob = []
        dense_con = []
        with tf.variable_scope("dense_prob", reuse=tf.AUTO_REUSE):
            for i in range(SN):
                sentence_con = tf.concat([c_s[i], att_s[i], match[i]], axis=2)
                prob = dense_summ(sentence_con,
                                  d,
                                  mask=sen_mask[i],
                                  keep_prob=config.keep_prob,
                                  is_train=self.is_train)
                dense_prob.append(prob)
                dense_con.append(sentence_con)
            # with tf.variable_scope("para_prob"):
            para_con = tf.concat([para_gru, para_s, para_match], axis=2)
            para_prob = dense_summ(para_con,
                                   d,
                                   mask=self.para_mask,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            dense_prob.append(para_prob)
            dense_prob = tf.concat(dense_prob, axis=1)
            self.topk = tf.nn.softmax(dense_prob)

        batch_nums = tf.range(0, limit=N)
        batch_nums = tf.expand_dims(batch_nums, 1)
        batch_nums = tf.tile(batch_nums, [1, self.sentence_maxlen])
        lo_shape = tf.constant([N, config.para_limit])

        sentence_index_slice = tf.unstack(self.sentence_index_slice, axis=1)
        # how to ensure the probability
        # sentence1,sentence2,setence3,q,para =?*4

        lo1 = []
        lo2 = []
        with tf.variable_scope("sentence_pointer", reuse=tf.AUTO_REUSE):

            self.init = summ(q[:, :, -2 * d:],
                             d,
                             mask=self.q_mask,
                             keep_prob=config.keep_prob,
                             is_train=self.is_train)
            pointer = ptr_net_span(batch=N,
                                   hidden=self.init.get_shape().as_list()[-1],
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            indice_test = []
            lo1_test = []
            lo2_test = []
            present = []
            present_inp = []

            for i in range(SN):
                logits1, logits2, inp1, inp2 = pointer(self.init, dense_con[i],
                                                       d, sen_mask[i])
                logits1 = logits1 * tf.cast(sen_mask[i], tf.float32)
                logits2 = logits2 * tf.cast(sen_mask[i], tf.float32)
                indice = tf.stack([batch_nums, sentence_index_slice[i]],
                                  axis=2)
                inp = tf.stack([inp1, inp2], axis=1)
                present.append(inp)
                present_inp.append(inp2)
                lo1_test.append(logits1)
                lo2_test.append(logits2)
                indice_test.append(indice)

            self.lo1 = lo1_test[0]
            self.lo2 = lo1_test[1]
            self.lo3 = lo1_test[2]

            lo1 = [
                tf.slice(tf.scatter_nd(in1, in2, lo_shape), [0, 0],
                         [N, self.para_maxlen])
                for (in1, in2) in zip(indice_test, lo1_test)
            ]
            lo2 = [
                tf.slice(tf.scatter_nd(in1, in2, lo_shape), [0, 0],
                         [N, self.para_maxlen])
                for (in1, in2) in zip(indice_test, lo2_test)
            ]

            with tf.variable_scope("para_pointer"):
                para_pointer = ptr_net_span(
                    batch=N,
                    hidden=self.init.get_shape().as_list()[-1],
                    keep_prob=config.keep_prob,
                    is_train=self.is_train)
                para_lo1, para_lo2, inp1, inp2 = para_pointer(
                    self.init, para_match, d, self.para_mask)
                present_para = tf.stack([inp1, inp2], axis=1)
                para_lo1 = softmax_mask(para_lo1, self.para_mask)
                para_lo2 = softmax_mask(para_lo2, self.para_mask)
            present.append(tf.tile(present_para, [1, 1, 3]))
            present_inp.append(inp2)
            lo1.append(para_lo1)
            lo2.append(para_lo2)
            self.lo4 = para_lo2
            self.present = tf.stack(present, axis=2)
            out_lo1 = tf.stack(lo1, axis=1)
            out_lo2 = tf.stack(lo2, axis=1)
            out_lo1 = (tf.expand_dims(self.topk, axis=2)) * out_lo1
            out_logits1 = tf.reduce_sum(out_lo1, axis=1)
            # out_logits1 = tf.slice(out_logits1, [0, 0], [N, self.para_maxlen])
            # out_logits1 = softmax_mask(out_logits1, self.para_mask)
            out_lo2 = (tf.expand_dims(self.topk, axis=2)) * out_lo2
            out_logits2 = tf.reduce_sum(out_lo2, axis=1)
            # out_logits2 = tf.slice(out_logits2, [0, 0], [N, self.para_maxlen])
            # out_logits2 = softmax_mask(out_logits2, self.para_mask)

            self.out_lo1 = out_lo1
            self.out_lo2 = out_logits1

            # out_logits1 = tf.nn.softmax(out_logits1)
            # out_logits2 = tf.nn.softmax(out_logits2)
            outer = tf.matmul(
                tf.expand_dims(tf.nn.softmax(out_logits1), axis=2),
                tf.expand_dims(tf.nn.softmax(out_logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)

        with tf.variable_scope("predict"):

            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=out_logits1, labels=tf.stop_gradient(self.y1_slice))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=out_logits2, labels=tf.stop_gradient(self.y2_slice))
            prob_y1 = tf.expand_dims(tf.reduce_max(tf.reduce_max(outer,
                                                                 axis=2),
                                                   axis=1),
                                     axis=1)
            prob_y2 = tf.expand_dims(tf.reduce_max(tf.reduce_max(outer,
                                                                 axis=1),
                                                   axis=1),
                                     axis=1)
            prob = tf.concat([prob_y1, prob_y2], axis=1)
            lossRL = -tf.log(prob) * self.reward_Diff
            self.out1 = losses

            self.out2 = losses2
            loss = tf.concat([
                tf.expand_dims(losses, axis=1),
                tf.expand_dims(losses2, axis=1)
            ],
                             axis=1)
            final_reward = loss * self.reward_Diff
            self.loss3 = tf.reduce_mean((losses + losses2))
            lam = config.lam
            self.loss_span = tf.reduce_mean(final_reward)
예제 #5
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        max_para = tf.reduce_max(self.passage_count)
        self.cell_fw = tf.contrib.rnn.GRUCell(dg)
        self.cell_bw = tf.contrib.rnn.GRUCell(dg)

        vp_concat = tf.zeros([N, 1, 300], tf.float32)
        clen_concat = tf.zeros([N, 1], tf.int32)
        c_mask_concat = tf.cast(tf.zeros([N, 1]), tf.bool)
        y1_concat = y2_concat = tf.zeros([N, 1])
        seq_mask_concat = tf.cast(tf.zeros([N, 1]),
                                  tf.bool)  # maybe seq mask is = c_mask
        q = tf.zeros([N, 1, 1])
        for i in range(config.max_para):
            i_ = tf.constant(i)

            #print_out(i)
            def vp():
                att, c_len, c_mask, y1, y2, seq_mask = self.get_vp(i)
                #print(att)
                #print(c_len)
                #print(c_mask)
                #print(y1)
                #print(y2)
                #print(seq_mask)
                #print(q)
                c_len = tf.reshape(c_len, [N, 1])
                att, c_len, c_mask, y1, y2, seq_mask = tf.cond(
                    tf.equal(i_, tf.constant(0)), lambda:
                    (att, c_len, c_mask, y1, y2, seq_mask), lambda: (
                        tf.concat([vp_concat, att], axis=1),
                        tf.concat([clen_concat, c_len], axis=1),
                        tf.concat([c_mask_concat, c_mask], axis=1),
                        tf.concat([y1_concat, y1], axis=1),
                        tf.concat([y2_concat, y2], axis=1),
                        tf.concat([seq_mask_concat, seq_mask], axis=1),
                    ))
                return att, c_len, c_mask, y1, y2, seq_mask

            def dummy():
                return vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat

            vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat \
             = tf.cond(i_ < max_para, vp, dummy)

        vp_mask_count = tf.reduce_sum(clen_concat, axis=1)

        # max count w.r.t original concatenated context (self.c_len)
        vpmccl = vp_mask_max_count_c_like = tf.reduce_max(vp_mask_count)
        # max count w.r.t concatenated vp (self.att_vP)
        ##### not used:
        vp_mask_max_count = tf.reduce_max(tf.reduce_max(clen_concat))

        vp_final_pad_meta = vp_mask_max_count_c_like - vp_mask_count

        # dont know why this diff happens, but it does
        diff = tf.shape(self.c_mask)[-1] - vp_mask_max_count_c_like

        vp_final_pad_seq = tf.sequence_mask(vp_final_pad_meta + diff)
        seq_mask_concat = tf.concat([seq_mask_concat, vp_final_pad_seq],
                                    axis=1)

        pad_length = tf.reduce_max(vp_final_pad_meta) + diff
        paddings = tf.convert_to_tensor([[0, 0], [0, pad_length], [0, 0]])
        new_vp = tf.pad(vp_concat, paddings, "CONSTANT")

        new_vp = tf.reshape(tf.boolean_mask(new_vp, seq_mask_concat),
                            [N, vpmccl + diff, 2 * config.hidden])
        """
		new_vp = tf.Print(new_vp,["vp_mask_max_count_c_like",vp_mask_max_count_c_like,
			"vp_final_pad_meta",vp_final_pad_meta,
			"vp_concat",tf.shape(vp_concat),"new_vp",tf.shape(new_vp),
			"c_mask",tf.shape(self.c_mask),"seq_mask_concat",tf.shape(seq_mask_concat),
			"clen_concat",clen_concat,"c_mask_last",self.c_mask[:,-1],
			"vp_mask_count",vp_mask_count,"c_len",self.c_len],
			summarize=N*10,message="SHORT")
		"""

        #self.c_mask = tf.concat([self.c_mask,vp_final_pad_seq],axis=1)
        with tf.variable_scope("pointer"):
            # r_Q:
            init = summ(self.q_enc[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)

            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            #logits1, logits2 = pointer(init, new_vp, d, self.c_mask)
            logits1, logits2 = pointer(init, new_vp, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
예제 #6
0
    def ready(self):
        config = self.config
        N, PL, QL, d = config.batch_size, self.c_maxlen, self.q_maxlen, config.hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.name_scope("embedding"):
            with tf.name_scope("title"):
                t_emb = tf.nn.embedding_lookup(self.word_mat, self.t)

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

        # c_emb = tf.concat([c_emb, ch_emb], axis=2)
        # q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        # answer predict
        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            #对答案区间进行限制
            #outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits1, labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits2, labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)

        # document selected
        with tf.variable_scope("select"):
            # batch_size dim
            c_cum = attention_pooling(match, init, self.c_mask, hidden=d)
            fuse = tf.concat([c_cum, init], axis=1)
            fuse = dense(fuse, hidden=d, use_bias=False, scope = "fully1")
            fuse = dense(fuse, hidden=1, use_bias=False, scope = "fully2")
            # batch_size 1
            logits_s = tf.sigmoid(fuse)
            fuse = tf.squeeze(fuse)
            self.s = tf.cast(self.s, tf.float32)
            self.loss_s = tf.nn.sigmoid_cross_entropy_with_logits(logits=fuse, labels=self.s)
예제 #7
0
파일: model.py 프로젝트: Mars-Wei/R-Net
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits1, labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits2, labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
예제 #8
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.ch), [N * PL, CL, dc])
                qh_emb = tf.reshape(tf.nn.embedding_lookup(
                    self.char_mat, self.qh), [N * QL, CL, dc])
                ch_emb = dropout(
                    ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                qh_emb = dropout(
                    qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3, num_units=d, batch_size=N, input_size=c_emb.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
                                   keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(
                att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
            rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
            ).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len) #[10, ?,300]

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
            )[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("content_modeling"):

            logits4, c_semantics = content_model(init, match, config.hidden)

        with tf.variable_scope("cross_passage_attention"):
            self.query_num = int(config.batch_size/config.passage_num)
            c_semantics = tf.reshape(c_semantics, shape=[self.query_num, config.passage_num, -1])
            attnc_key = tf.tile(tf.expand_dims(c_semantics, axis=2), [1, 1, config.passage_num, 1])
            attnc_mem = tf.tile(tf.expand_dims(c_semantics, axis=1), [1, config.passage_num, 1, 1])
            attnc_w = tf.reduce_sum(attnc_key*attnc_mem, axis=-1)
            attnc_mask = tf.ones([config.passage_num, config.passage_num])-tf.diag([1.0]*config.passage_num)
            attnc_w = tf.nn.softmax(attnc_w*attnc_mask, axis=-1)
            attncp = tf.reduce_sum(tf.tile(tf.expand_dims(attnc_w, axis=-1), [1, 1, 1, 2*config.hidden])*attnc_mem, axis= 2)
        
        
        with tf.variable_scope("pseudo_label"):
            self.is_select = tf.reshape(tf.squeeze(self.is_select), shape=[self.query_num, config.passage_num])
            self.is_select = self.is_select/tf.tile(tf.reduce_sum(self.is_select, axis=-1, keepdims=True), [1, config.passage_num])
            sim_matrix = attnc_w
            lb_matrix = tf.tile(tf.expand_dims(self.is_select, axis=1), [1, config.passage_num, 1])
            self.pse_is_select = tf.reduce_sum(sim_matrix*lb_matrix, axis=-1) + tf.constant([0.00000001]*config.passage_num, dtype=tf.float32)    # avoid all zero
            self.pse_is_select = self.pse_is_select/tf.tile(tf.reduce_sum(self.pse_is_select, axis=-1, keepdims=True), [1,config.passage_num])
            alpha = 0.7
            self.fuse_label = alpha*self.is_select + (1-alpha)*tf.stop_gradient(self.pse_is_select)
        

        with tf.variable_scope("predict_passage"):
            init = tf.reshape(init, shape=[self.query_num, config.passage_num, -1])
            attn_concat = tf.concat([init, attncp, c_semantics], axis=-1)
            d1 = tf.layers.dense(attn_concat, 2*config.hidden, activation= tf.nn.leaky_relu, bias_initializer= tf.glorot_uniform_initializer()) #150
            d2 = tf.layers.dense(d1, config.hidden, activation= tf.nn.leaky_relu, bias_initializer= tf.glorot_uniform_initializer()) #75
            logits3 = tf.squeeze(tf.layers.dense(d2, 1, activation= None, bias_initializer= tf.glorot_uniform_initializer()))
        
        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 30)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            #logits3 = tf.reduce_max(tf.reduce_max(outer, axis=2), axis=1)
            self.is_select_p = tf.nn.sigmoid(logits3)

            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
           
            weighted_losses = weighted_loss(config, 0.000001, self.y1, losses) #0.01
            weighted_losses2 = weighted_loss(config, 0.000001, self.y2, losses2) #0.01
            
            losses3 = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits3, labels=tf.stop_gradient(self.fuse_label)))
            
            in_answer_weight = tf.ones_like(self.in_answer) + 3*self.in_answer
            
            losses4 = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
                logits=logits4, labels=tf.stop_gradient(self.in_answer))*in_answer_weight, axis=-1)

            weighted_losses4 = weighted_loss(config, 0.000001, self.in_answer, losses4)
            
            self.loss_dict = {'pos_s loss':losses, 'pos_e loss':losses2, 'select loss':losses3, 'in answer':losses4}
            for key, values in self.loss_dict.items():
                self.loss_dict[key] = tf.reduce_mean(values)
            
            self.loss = tf.reduce_mean(weighted_losses + weighted_losses2 + losses3+ weighted_losses4)
예제 #9
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = native_rnn

        c_elmo_features = self.elmo(self.c_elmo)
        q_elmo_features = self.elmo(self.q_elmo)

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_elmo_emb = weight_layers('embedding',
                                       c_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']
            tf.get_variable_scope().reuse_variables()
            q_elmo_emb = weight_layers('embedding',
                                       q_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']

            c_elmo_emb = dropout(c_elmo_emb,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)
            q_elmo_emb = dropout(q_elmo_emb,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)

            c_emb = tf.concat([c_emb, ch_emb, c_elmo_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb, q_elmo_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(config.cell,
                      num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(config.cell,
                      num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(config.cell,
                      num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

            c_elmo_enc = weight_layers('encoding',
                                       c_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']
            tf.get_variable_scope().reuse_variables()
            q_elmo_enc = weight_layers('encoding',
                                       q_elmo_features,
                                       l2_coef=0.0,
                                       do_layer_norm=False)['weighted_op']

            c_elmo_enc = dropout(c_elmo_enc,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)
            q_elmo_enc = dropout(q_elmo_enc,
                                 keep_prob=config.elmo_keep_prob,
                                 is_train=self.is_train)

            match = tf.concat([match, c_elmo_enc], -1)
            q = tf.concat([q, q_elmo_enc], -1)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.y2))
            self.loss = tf.reduce_mean(losses + losses2)
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train,
                                   name_scope="attention_layer")
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)
            tf.summary.histogram('vt_P', att)
            self.att_logits = tf.get_collection('Softmax_logits')[0]
            self.att_outputs = tf.get_collection('MatMul_outputs')[0]

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train,
                                     name_scope="match_layer")
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)
            tf.summary.histogram('self_match', match)
            self.match_logits = tf.get_collection('Softmax_logits')[1]
            self.match_outputs = tf.get_collection('MatMul_outputs')[1]

        with tf.variable_scope("pointer"):
            # r_Q:
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)

            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)
            tf.summary.histogram('rQ_init', init)
            tf.summary.histogram('pointer_logits_1', logits1)
            tf.summary.histogram('pointer_logits_2', logits2)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
            ####
            self.predict_outer_start = tf.reduce_max(outer, axis=2)
            self.predict_outer_end = tf.reduce_max(outer, axis=1)
            """
    def ready(self):
        config = self.config
        N, PL, QL, d = config.batch_size, self.c_maxlen, self.q_maxlen, config.hidden
        keep_prob, is_train = config.keep_prob, config.is_train
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.name_scope("word"):
                c = tf.nn.embedding_lookup(self.word_mat, self.c)
                q = tf.nn.embedding_lookup(self.word_mat, self.q)
            c_emb = tf.concat([c, self.fs, self.fe], axis=2)
            q_emb = q

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c_enc, bw_final_state_c = rnn(c_emb, seq_len=self.c_len)
            q_enc, bw_final_state_q = rnn(q_emb, seq_len=self.q_len)

            encoder_outputs = tf.concat([c_enc, q_enc], axis=1)
            bw_final_state = (bw_final_state_c, bw_final_state_q)

        with tf.variable_scope("attention"):
            bi_final_hidden = dropout(bw_final_state,
                                      keep_prob=keep_prob,
                                      is_train=is_train)
            source_sequence_length = tf.add(PL, QL)

            logits, sample_id, final_context_state = _build_decoder(
                encoder_outputs, bi_final_hidden, config, is_train,
                source_sequence_length, target_sequence_length, target_input,
                embedding_decoder)
            """
			
			qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d,
				keep_prob=config.keep_prob, is_train=self.is_train,
				name_scope="attention_layer")
			rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=qc_att.get_shape(
			).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
			att = rnn(qc_att, seq_len=self.c_len)
			# att is the v_P
			if i==0:
				att_vP = att
			else:
				att_vP = tf.concat([att_vP, att], axis=1)
			#att = tf.Print(att,[att],message="att:")
			print("att:",att.get_shape().as_list())
			print("att_vP:",att_vP.get_shape().as_list())
			"""

        with tf.variable_scope("pointer"):

            # r_Q:
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            print("rQ:", init.get_shape().as_list())
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, att_vP, d, self.c_pr_mask)
            tf.summary.histogram('rQ_init', init)
            tf.summary.histogram('pointer_logits_1', logits1)
            tf.summary.histogram('pointer_logits_2', logits2)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1_pr)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(
                logits=logits2, labels=self.y2_pr)
            #losses1_2 = tf.reduce_mean(losses1_2, axis=0)
            self.loss = tf.reduce_mean(losses + losses2)

            # print losses
            #condition = tf.greater(self.loss, 11)
            #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
            #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        if config.with_passage_ranking:
            gi = None
            for i in range(config.max_para):
                # Passage ranking
                if i == 0:
                    with tf.variable_scope("passage-ranking-attention"):

                        #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                        vj_P = att_vP[:, i * 400:(i + 1) * 400, :]
                        pr_att = pr_attention(
                            batch=N,
                            hidden=init.get_shape().as_list()[-1],
                            keep_prob=config.keep_prob,
                            is_train=self.is_train,
                            name_scope="passage_ranking_att_layer")
                        r_P = pr_att(init, vj_P, d, self.c_mask)
                        tf.summary.histogram('r_P_' + str(i), r_P)
                        #r_P = tf.Print(r_P,[r_P],message="r_p")
                        # Wg
                        concatenate = tf.concat([init, r_P], axis=1)
                        g = tf.nn.tanh(
                            dense(concatenate,
                                  hidden=d,
                                  use_bias=False,
                                  scope="g",
                                  name_scope="dense_pr_att_layer_1"))
                        g_ = dense(g,
                                   1,
                                   use_bias=False,
                                   scope="g_",
                                   name_scope="dense_pr_att_layer_2")
                        #g = tf.Print(g,[g],message="g")
                        if i == 0:
                            gi = tf.reshape(g_, [N, 1])
                        else:
                            gi = tf.concat([gi, tf.reshape(g_, [N, 1])],
                                           axis=1)
                else:
                    with tf.variable_scope("passage-ranking-attention",
                                           reuse=True):
                        #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                        vj_P = att_vP[:, i * 400:(i + 1) * 400, :]
                        pr_att = pr_attention(
                            batch=N,
                            hidden=init.get_shape().as_list()[-1],
                            keep_prob=config.keep_prob,
                            is_train=self.is_train,
                            name_scope="passage_ranking_att_layer")
                        r_P = pr_att(init, vj_P, d, self.c_mask)
                        tf.summary.histogram('r_P_' + str(i), r_P)
                        #r_P = tf.Print(r_P,[r_P],message="r_p")
                        # Wg

                        concatenate = tf.concat([init, r_P], axis=1)
                        g = tf.nn.tanh(
                            dense(concatenate,
                                  hidden=d,
                                  use_bias=False,
                                  scope="g",
                                  name_scope="dense_pr_att_layer_1"))
                        g_ = dense(g,
                                   1,
                                   use_bias=False,
                                   scope="g_",
                                   name_scope="dense_pr_att_layer_2")
                        #g = tf.Print(g,[g],message="g")
                        if i == 0:
                            gi = tf.reshape(g_, [N, 1])
                        else:
                            gi = tf.concat([gi, tf.reshape(g_, [N, 1])],
                                           axis=1)
            tf.summary.histogram('gi', gi)
            #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32)
            #self.gi = tf.nn.softmax(gi_)
            #self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
            #			logits=gi_, labels=tf.reshape(self.pr,[-1,1]))
            self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
                logits=gi, labels=self.pr)
            #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3),
            #	tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:")
            self.pr_loss = tf.reduce_mean(self.losses3)
            #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss])
            self.r = tf.constant(0.8)
            self.e_loss1 = tf.multiply(self.r, self.loss)
            self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r),
                                       self.pr_loss)
            self.e_loss = tf.add(self.e_loss1, self.e_loss2)
예제 #12
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = \
            config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, \
            config.char_dim, config.char_hidden
        gru = CudnnGRU if config.use_cudnn else NativeGRU

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)  # representation of paragraph
            q = rnn(q_emb, seq_len=self.q_len)  # representation of question

        with tf.variable_scope(
                "attention"
        ):  # gated att rnn (using dot att from Attention is All You Need actually)
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):  # self-matching rnn
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = PointerNet(batch=N,
                                 hidden=init.get_shape().as_list()[-1],
                                 keep_prob=config.ptr_keep_prob,
                                 is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
예제 #13
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru
        with tf.variable_scope("emb"):
            # with tf.variable_scope("char"):
            # ch_emb = tf.reshape(tf.nn.embedding_lookup(
            #     self.char_mat, self.ch), [N * PL, CL, dc])
            # qh_emb = tf.reshape(tf.nn.embedding_lookup(
            #     self.char_mat, self.qh), [N * QL, CL, dc])
            #
            # ch_emb = dropout(
            #     ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            # qh_emb = dropout(
            #     qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #
            # cell_fw = tf.contrib.rnn.GRUCell(dg)
            # cell_bw = tf.contrib.rnn.GRUCell(dg)
            #
            # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #     cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
            # ch_emb = tf.concat([state_fw, state_bw], axis=1)
            # _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #     cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
            # qh_emb = tf.concat([state_fw, state_bw], axis=1)
            # qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
            # ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
            embedding = tf.get_variable(
                'embedding', [config.vocab_size, config.embedding_size],
                initializer=tf.random_uniform_initializer(minval=-0.05,
                                                          maxval=0.05))

            self.regularizer = tf.nn.l2_loss(embedding)

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(embedding, self.c)
                q_emb = tf.nn.embedding_lookup(embedding, self.q)
            c_emb = dropout(c_emb,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)
            q_emb = dropout(q_emb,
                            keep_prob=config.keep_prob,
                            is_train=self.is_train)
            c_emb = tf.reshape(c_emb, [N, PL, config.embedding_size])
            q_emb = tf.reshape(q_emb, [N, QL, config.embedding_size])
            #     c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
            #     q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)
            # c_emb = tf.concat([c_emb, ch_emb], axis=2)
            # q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            # 1层 lstm对输出进行编码
            rnn_c = gru(num_layers=1,
                        num_units=d,
                        batch_size=N,
                        input_size=c_emb.get_shape().as_list()[-1],
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)
            rnn_q = gru(num_layers=1,
                        num_units=d,
                        batch_size=N,
                        input_size=q_emb.get_shape().as_list()[-1],
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)
            c = rnn_c(c_emb, seq_len=self.c_len)
            q = rnn_q(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            M = tf.matmul(c, q, adjoint_b=True)
            M_mask = tf.to_float(
                tf.matmul(tf.cast(tf.expand_dims(self.c_mask, -1), tf.int32),
                          tf.cast(tf.expand_dims(self.q_mask, 1), tf.int32)))
            alpha = softmax(M, 1, M_mask)  # (batch_size,M,N)
            beta = softmax(M, 2, M_mask)  # (batch_size,M,N)
            # query_importance = tf.expand_dims(tf.reduce_mean(beta, reduction_indices=1), -1)
            query_importance = tf.expand_dims(
                tf.reduce_sum(beta, 1) / tf.to_float(tf.expand_dims(PL, -1)),
                -1)  # (batch_size,N,1)
            s = tf.squeeze(tf.matmul(alpha, query_importance),
                           [2])  # (batch_size,M)
            #unpacked_s = zip(tf.unstack(s, config.batch_size), tf.unstack(self.c, config.batch_size))
            #y_hat=(batch_size,config.vocab_size)  (代表每个词为答案的概率)
            #y_hat = tf.stack([tf.unsorted_segment_sum(attentions, sentence_ids, config.vocab_size) for (attentions, sentence_ids) in unpacked_s])
            match = c * tf.reshape(s, [-1, PL, 1])  #(batch_size,max_c_len,dim)
        #通过embedding q 获得rQ
        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            self.start_logits = tf.nn.softmax(logits1)
            self.stop_logits = tf.nn.softmax(logits2)
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(
                losses + losses2) + config.l2_reg * self.regularizer
예제 #14
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])

                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)

                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)

                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            #3层 lstm对输出进行编码
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)

            #with the size(batch_size,max_len,hidden_dim)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("relation analysis"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            g_theta_layers = [256, 128, 1]  # attention component
            md = Relation_Module(config, self.c_maxlen, self.q_maxlen,
                                 g_theta_layers)
            #r add attention weight with q_summary
            r, alpha = md.hop_2(c,
                                init,
                                phase=self.is_train,
                                activation=tf.nn.relu)
            c = r[-1]

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

#通过embedding q 获得rQ
        with tf.variable_scope("pointer"):
            # init = summ(q[:, :, -2 * d:], d, mask=self.q_mask,
            #             keep_prob=config.ptr_keep_prob, is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            self.start_logits = tf.nn.softmax(logits1)
            self.stop_logits = tf.nn.softmax(logits2)
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
예제 #15
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, BL, d, dc, dg, dbpe, dbpeh = config.batch_size, self.c_maxlen, self.q_maxlen, \
                                                   config.char_limit, config.bpe_limit, config.hidden, \
                                                   config.glove_dim if config.pretrained_char else config.char_dim, config.char_hidden, \
                                                   config.bpe_glove_dim if config.pretrained_bpe_emb else config.bpe_dim, config.bpe_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            if config.use_char:
                with tf.variable_scope("char"):
                    ch_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.ch),
                        [N * PL, CL, dc])
                    qh_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.qh),
                        [N * QL, CL, dc])
                    ch_emb = dropout(ch_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    qh_emb = dropout(qh_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dg)
                    cell_bw = tf.contrib.rnn.GRUCell(dg)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        ch_emb,
                        self.ch_len,
                        dtype=tf.float32)
                    ch_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qh_emb,
                        self.qh_len,
                        dtype=tf.float32)
                    qh_emb = tf.concat([state_fw, state_bw], axis=1)
                    qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                    ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            if config.use_bpe:
                with tf.variable_scope("bpe"):
                    cb_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.bpe_mat, self.cb),
                        [N * PL, BL, dbpe])
                    qb_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.bpe_mat, self.qb),
                        [N * QL, BL, dbpe])
                    cb_emb = dropout(cb_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    qb_emb = dropout(qb_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dbpeh)
                    cell_bw = tf.contrib.rnn.GRUCell(dbpeh)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        cb_emb,
                        self.cb_len,
                        dtype=tf.float32)
                    cb_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qb_emb,
                        self.qb_len,
                        dtype=tf.float32)
                    qb_emb = tf.concat([state_fw, state_bw], axis=1)
                    qb_emb = tf.reshape(qb_emb, [N, QL, 2 * dbpeh])
                    cb_emb = tf.reshape(cb_emb, [N, PL, 2 * dbpeh])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            if config.use_char:
                c_emb = tf.concat([c_emb, ch_emb], axis=2)
                q_emb = tf.concat([q_emb, qh_emb], axis=2)

            if config.use_bpe:
                c_emb = tf.concat([c_emb, cb_emb], axis=2)
                q_emb = tf.concat([q_emb, qb_emb], axis=2)

            if config.use_pos:
                cp_emb = tf.nn.embedding_lookup(self.pos_mat, self.cp)
                qp_emb = tf.nn.embedding_lookup(self.pos_mat, self.qp)
                c_emb = tf.concat([c_emb, cp_emb], axis=2)
                q_emb = tf.concat([q_emb, qp_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        gi = []
        att_vP = []

        for i in range(config.max_para):
            print(i)
            with tf.variable_scope("emb" + str(i)):
                with tf.variable_scope("char" + str(i)):
                    #CL = tf.Print(CL,[CL],message="CL:")
                    #PL = tf.Print(PL,[PL],message="PL:")
                    #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr.get_shape()],message="ch_pr:")
                    self.ch_pr_ = self.ch_pr[:, i * 400:(i + 1) * 400, :]
                    print(self.ch_pr_.get_shape())
                    #self.c_pr = tf.reshape(self.c_pr, [N, 12, PL])
                    #print(self.ch.get_shape())
                    #print(self.ch_pr.get_shape())
                    #print(self.c.get_shape())
                    #print(self.c_pr.get_shape())
                    #self.ch_pr = tf.Print(self.ch_pr,[self.ch_pr[:,2:,:]],message="ch_pr")
                    ch_emb = tf.reshape(tf.nn.embedding_lookup(\
                     self.char_mat, self.ch_pr_), [N * PL, CL, dc])
                    #	self.char_mat, self.ch), [N * PL, CL, dc])
                    qh_emb = tf.reshape(
                        tf.nn.embedding_lookup(self.char_mat, self.qh),
                        [N * QL, CL, dc])
                    ch_emb = dropout(ch_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb")
                    #qh_emb = tf.Print(qh_emb,[qh_emb],message="qh_emb")
                    qh_emb = dropout(qh_emb,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
                    cell_fw = tf.contrib.rnn.GRUCell(dg)
                    cell_bw = tf.contrib.rnn.GRUCell(dg)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        ch_emb,
                        self.ch_len,
                        dtype=tf.float32)
                    ch_emb = tf.concat([state_fw, state_bw], axis=1)
                    _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                        cell_fw,
                        cell_bw,
                        qh_emb,
                        self.qh_len,
                        dtype=tf.float32)
                    #state_fw = tf.Print(state_fw,[state_fw],message="state_fw")
                    #state_bw = tf.Print(state_bw,[state_bw],message="state_bw")
                    qh_emb = tf.concat([state_fw, state_bw], axis=1)
                    qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                    ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])
                    #ch_emb = tf.Print(ch_emb,[ch_emb],message="ch_emb")
                with tf.name_scope("word" + str(i)):
                    c_emb = tf.nn.embedding_lookup(
                        self.word_mat, self.c_pr[:, i * 400:(i + 1) * 400])
                    q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

                c_emb = tf.concat([c_emb, ch_emb], axis=2)
                q_emb = tf.concat([q_emb, qh_emb], axis=2)

            with tf.variable_scope("encoding" + str(i)):
                rnn = gru(num_layers=3,
                          num_units=d,
                          batch_size=N,
                          input_size=c_emb.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                c = rnn(c_emb, seq_len=self.c_len)
                q = rnn(q_emb, seq_len=self.q_len)

            with tf.variable_scope("attention" + str(i)):
                qc_att = dot_attention(c,
                                       q,
                                       mask=self.q_mask,
                                       hidden=d,
                                       keep_prob=config.keep_prob,
                                       is_train=self.is_train)
                rnn = gru(num_layers=1,
                          num_units=d,
                          batch_size=N,
                          input_size=qc_att.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)
                att = rnn(qc_att, seq_len=self.c_len)
                # att is the v_P
                if i == 0:
                    att_vP = att
                else:
                    att_vP = tf.concat([att_vP, att], axis=1)
                #att = tf.Print(att,[att],message="att:")
                print("att:", att.get_shape().as_list())
                print("att_vP:", att_vP.get_shape().as_list())
            #att_vP = tf.Print(att_vP,[tf.shape(att_vP)],message="att_vP:")
            """
			with tf.variable_scope("match"):
				self_att = dot_attention(
					att, att, mask=self.c_mask, hidden=d, keep_prob=config.keep_prob, is_train=self.is_train)
				rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
				).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
				match = rnn(self_att, seq_len=self.c_len)
			"""
        with tf.variable_scope("pointer"):

            # r_Q:
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            print("rQ:", init.get_shape().as_list())
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, att, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            #losses1_2 = tf.reduce_mean(losses1_2, axis=0)
            self.loss = tf.reduce_mean(losses + losses2)

            # print losses
            #condition = tf.greater(self.loss, 11)
            #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
            #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        if config.with_passage_ranking:
            gi = None
            for i in range(config.max_para):
                # Passage ranking
                with tf.variable_scope("passage-ranking-attention" + str(i)):

                    #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                    vj_P = att_vP[:, i * 400:(i + 1) * 400, :]
                    pr_att = pr_attention(
                        batch=N,
                        hidden=init.get_shape().as_list()[-1],
                        keep_prob=config.keep_prob,
                        is_train=self.is_train)
                    r_P = pr_att(init, vj_P, d, self.c_mask)
                    #r_P = tf.Print(r_P,[r_P],message="r_p")
                    # Wg
                    concatenate = tf.concat([init, r_P], axis=1)
                    g = tf.nn.tanh(
                        dense(concatenate,
                              hidden=d,
                              use_bias=False,
                              scope="g" + str(i)))
                    g_ = dense(g, 1, use_bias=False, scope="g_" + str(i))
                    #g = tf.Print(g,[g],message="g")
                    if i == 0:
                        gi = tf.reshape(g_, [N, 1])
                    else:
                        gi = tf.concat([gi, tf.reshape(g_, [N, 1])], axis=1)
            #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32)
            #self.gi = tf.nn.softmax(gi_)
            #self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
            #			logits=gi_, labels=tf.reshape(self.pr,[-1,1]))
            self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
                logits=gi, labels=self.pr)
            #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3),
            #	tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:")
            self.pr_loss = tf.reduce_mean(self.losses3)
            #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss])
            self.r = tf.constant(0.8)
            self.e_loss1 = tf.multiply(self.r, self.loss)
            self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r),
                                       self.pr_loss)
            self.e_loss = tf.add(self.e_loss1, self.e_loss2)
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, \
         config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn_gru else native_gru

        gi = []
        #att_vP = []

        self.cell_fw = tf.contrib.rnn.GRUCell(dg)
        self.cell_bw = tf.contrib.rnn.GRUCell(dg)
        self.rnn1 = None
        self.rnn2 = None
        self.att_vP = tf.zeros([N, 1, 2 * d])
        c_pr_mask = self.c_pr_mask
        qtemp = tf.zeros([N, 1, 900])

        #   _c = concatenation
        cmax_c = tf.zeros([5], tf.int32)
        clen_c = tf.zeros([N, 5], tf.int32)
        """
		self.rnn1 = gru(num_layers=3, num_units=d, batch_size=N, input_size=500,\
			keep_prob=config.keep_prob, is_train=self.is_train)
		self.rnn2 = gru(num_layers=1, num_units=d, batch_size=N, input_size=1800,\
			keep_prob=config.keep_prob, is_train=self.is_train)
		"""
        result, self.att_vP, q, self.answer_info, self.y1, self.y2, self.c_pr_mask, cmax_c, clen_c = \
         tf.while_loop(self.condition, self.get_vP, loop_vars=[self.i,self.att_vP,qtemp, \
         self.answer_info,self.y1,self.y2,c_pr_mask, cmax_c, clen_c], shape_invariants= \
         [self.i.get_shape(), tf.TensorShape([N, None, 2*d]), tf.TensorShape([N, None, 900]), \
         self.answer_info.get_shape(), tf.TensorShape([None, None]), tf.TensorShape([None, None]), \
         tf.TensorShape([None, None]), tf.TensorShape([None]), tf.TensorShape([N, None])])

        tf.summary.histogram('att_vP', self.att_vP)
        #att_vP = tf.Print(att_vP,[tf.shape(att_vP)],message="att_vP:")
        """
		with tf.variable_scope("match"):
			self_att = dot_attention(
				att, att, mask=self.c_mask, hidden=d,
				keep_prob=config.keep_prob, is_train=self.is_train)
			rnn = gru(num_layers=1, num_units=d, batch_size=N, input_size=self_att.get_shape(
			).as_list()[-1], keep_prob=config.keep_prob, is_train=self.is_train)
			match = rnn(self_att, seq_len=self.c_len)
		"""
        with tf.variable_scope("pointer"):

            # r_Q:
            #self.att_vP = tf.Print(self.att_vP,[tf.shape(self.att_vP),tf.shape(self.c_pr_mask)],
            #	message="pointer:")
            #self.att_vP = tf.Print(self.att_vP,[tf.greater(self.att_vP,y1),tf.shape(self.c_mask)],
            #	message="pointer:")
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            print("rQ:", init.get_shape().as_list())
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, self.att_vP, d, self.c_pr_mask)
            logits1 = tf.Print(logits1, [tf.nn.softmax(logits1)],
                               message="logits1",
                               summarize=100)
            logits2 = tf.Print(logits2, [tf.nn.softmax(logits2)],
                               message="logits2",
                               summarize=100)
            tf.summary.histogram('rQ_init', init)
            tf.summary.histogram('pointer_logits_1', logits1)
            tf.summary.histogram('pointer_logits_2', logits2)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)

            def condition_j(y1, y2, j, ny1, ny2, cm, cl):
                return tf.less(j, N)

            def batch_j(y1, y2, j, new_y1, new_y2, cm, cl):
                loop_var_i = tf.constant(0, tf.int32)
                #loop_var_i = tf.Print(loop_var_i,[loop_var_i],message="loop_var_i")
                y1, y2, j, i, new_y1, new_y2, cm, cl = tf.while_loop(
                    condition_i,
                    passage_i,
                    loop_vars=[y1, y2, j, loop_var_i, new_y1, new_y2, cm, cl],
                    shape_invariants=[
                        tf.TensorShape([None]),
                        tf.TensorShape([None]),
                        j.get_shape(),
                        loop_var_i.get_shape(),
                        tf.TensorShape([N]),
                        tf.TensorShape([N]),
                        tf.TensorShape([None]),
                        tf.TensorShape([None, None])
                    ])

                j = tf.add(j, tf.constant(1))
                return y1, y2, j, new_y1, new_y2, cm, cl

            def passage_i(y1, y2, j, i, new_y1, new_y2, cm, cl):
                def c1_1():
                    indices1 = tf.reshape(j, [1, 1])
                    updates1 = tf.reshape(cm[i], [1])
                    shape1 = tf.reshape(N, [1])
                    scatter1 = tf.scatter_nd(indices1, updates1, shape1)
                    y1_ = tf.subtract(y1, scatter1)
                    indices2 = tf.reshape(j, [1, 1])
                    updates2 = tf.reshape(cl[j, i], [1])
                    shape2 = tf.reshape(N, [1])
                    scatter2 = tf.scatter_nd(indices2, updates2, shape2)
                    new_y1_ = tf.add(new_y1, scatter2)
                    return y1_, new_y1_

                def c2_1():
                    indices1 = tf.reshape(j, [1, 1])
                    updates1 = tf.reshape(y1[j], [1])
                    shape1 = tf.reshape(N, [1])
                    scatter1 = tf.scatter_nd(indices1, updates1, shape1)
                    new_y1_ = tf.add(new_y1, scatter1)
                    return y1, new_y1_

                def c1_2():
                    indices1 = tf.reshape(j, [1, 1])
                    updates1 = tf.reshape(cm[i], [1])
                    shape1 = tf.reshape(N, [1])
                    scatter1 = tf.scatter_nd(indices1, updates1, shape1)
                    y2_ = tf.subtract(y2, scatter1)
                    indices2 = tf.reshape(j, [1, 1])
                    updates2 = tf.reshape(cl[j, i], [1])
                    shape2 = tf.reshape(N, [1])
                    scatter2 = tf.scatter_nd(indices2, updates2, shape2)
                    new_y2_ = tf.add(new_y2, scatter2)
                    return y2_, new_y2_

                def c2_2():
                    indices1 = tf.reshape(j, [1, 1])
                    updates1 = tf.reshape(y2[j], [1])
                    shape1 = tf.reshape(N, [1])
                    scatter1 = tf.scatter_nd(indices1, updates1, shape1)
                    new_y2_ = tf.add(new_y2, scatter1)
                    return y2, new_y2_

                #y1,new_y1 = tf.cond(cond_i_1, c1_1, c2_1)
                #y2,new_y2 = tf.cond(cond_i_2, c1_2, c2_2)
                #i = tf.Print(i,[i],message="loop_var_i")
                #j = tf.Print(j,[j],message="loop_var_j")
                y1, new_y1 = tf.cond(y1[j] > cm[i], c1_1, c2_1)
                y2, new_y2 = tf.cond(y2[j] > cm[i], c1_2, c2_2)
                i = tf.add(i, tf.constant(1))
                return y1, y2, j, i, new_y1, new_y2, cm, cl

            def condition_i(y1, y2, j, i, ny1, ny2, cm, cl):
                #self.para_count = tf.Print(self.para_count,[self.para_count[j]],message="para_count j")
                return tf.less(i, self.para_count[j])

            new_yp1 = tf.zeros([N], tf.int32)
            new_yp2 = tf.zeros([N], tf.int32)
            #cmax_c = tf.cast(cmax_c,tf.int32)
            #clen_c = tf.cast(clen_c,tf.int32)
            loop_var_j = tf.constant(0, tf.int32)
            self.yp1, self.yp2 = tf.cast(self.yp1, tf.int32), tf.cast(
                self.yp2, tf.int32)

            self.yp1, self.yp2, loop_var_j, new_yp1, new_yp2, cm, cl = tf.while_loop(condition_j,
             batch_j, loop_vars=[self.yp1, self.yp2, loop_var_j, new_yp1, new_yp1,\
                  cmax_c, clen_c],\
             shape_invariants=[tf.TensorShape([None]), tf.TensorShape([None]), loop_var_j.get_shape(),
             tf.TensorShape([N]), tf.TensorShape([N]), tf.TensorShape([None]), tf.TensorShape([None,None])])

            #self.yp1 = tf.Print(self.yp1,[self.yp1],message="yp1",summarize=N)

            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            losses = tf.Print(losses, [losses], message="losses", summarize=20)
            losses2 = tf.Print(losses2, [losses2],
                               message="losses2",
                               summarize=20)
            #losses1_2 = tf.reduce_mean(losses1_2, axis=0)
            self.loss = tf.reduce_mean(losses + losses2)
            print(self.loss)
            # print losses
            #condition = tf.greater(self.loss, 11)
            #self.yp1 = tf.where(condition, tf.Print(self.yp1,[self.yp1],message="Yp1:"), self.yp1)
            #self.yp2 = tf.where(condition, tf.Print(self.yp2,[self.yp2],message="Yp2:"), self.yp1)

        if config.with_passage_ranking:
            gi = None
            for i in range(config.max_para):
                # Passage ranking
                if i == 0:
                    with tf.variable_scope("passage-ranking-attention"):
                        #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                        vj_P = self.att_vP[:, i * 400:(i + 1) * 400, :]
                        pr_att = pr_attention(
                            batch=N,
                            hidden=init.get_shape().as_list()[-1],
                            keep_prob=config.keep_prob,
                            is_train=self.is_train,
                            name_scope="passage_ranking_att_layer")
                        r_P = pr_att(init, vj_P, d, self.c_mask)
                        tf.summary.histogram('r_P_' + str(i), r_P)
                        #r_P = tf.Print(r_P,[r_P],message="r_p")
                        # Wg
                        concatenate = tf.concat([init, r_P], axis=1)
                        g = tf.nn.tanh(
                            dense(concatenate,
                                  hidden=d,
                                  use_bias=False,
                                  scope="g",
                                  name_scope="dense_pr_att_layer_1"))
                        g_ = dense(g,
                                   1,
                                   use_bias=False,
                                   scope="g_",
                                   name_scope="dense_pr_att_layer_2")
                        #g = tf.Print(g,[g],message="g")
                        if i == 0:
                            gi = tf.reshape(g_, [N, 1])
                        else:
                            gi = tf.concat([gi, tf.reshape(g_, [N, 1])],
                                           axis=1)
                else:
                    with tf.variable_scope("passage-ranking-attention",
                                           reuse=True):
                        #att_vP = tf.Print(att_vP,[att_vP.get_shape()],message="att_vP:")
                        vj_P = self.att_vP[:, i * 400:(i + 1) * 400, :]
                        pr_att = pr_attention(
                            batch=N,
                            hidden=init.get_shape().as_list()[-1],
                            keep_prob=config.keep_prob,
                            is_train=self.is_train,
                            name_scope="passage_ranking_att_layer")
                        r_P = pr_att(init, vj_P, d, self.c_mask)
                        tf.summary.histogram('r_P_' + str(i), r_P)
                        #r_P = tf.Print(r_P,[r_P],message="r_p")
                        # Wg

                        concatenate = tf.concat([init, r_P], axis=1)
                        g = tf.nn.tanh(
                            dense(concatenate,
                                  hidden=d,
                                  use_bias=False,
                                  scope="g",
                                  name_scope="dense_pr_att_layer_1"))
                        g_ = dense(g,
                                   1,
                                   use_bias=False,
                                   scope="g_",
                                   name_scope="dense_pr_att_layer_2")
                        #g = tf.Print(g,[g],message="g")
                        if i == 0:
                            gi = tf.reshape(g_, [N, 1])
                        else:
                            gi = tf.concat([gi, tf.reshape(g_, [N, 1])],
                                           axis=1)
            tf.summary.histogram('gi', gi)
            #gi_ = tf.convert_to_tensor(gi,dtype=tf.float32)
            #self.gi = tf.nn.softmax(gi_)
            #self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
            #			logits=gi_, labels=tf.reshape(self.pr,[-1,1]))
            self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
                logits=gi, labels=self.pr)
            #self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3),
            #	tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:")
            self.pr_loss = tf.reduce_mean(self.losses3)
            #self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss])
            self.r = tf.constant(0.8)
            self.e_loss1 = tf.multiply(self.r, self.loss)
            self.e_loss2 = tf.multiply(tf.subtract(tf.constant(1.0), self.r),
                                       self.pr_loss)
            self.e_loss = tf.add(self.e_loss1, self.e_loss2)
예제 #18
0
파일: model.py 프로젝트: zjhyyzz/GP-Net
    def ready(self):
        config = self.config
        N, QL, CL, d, dc, dg = config.batch_size, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru
        SN, SL = self.c_s_maxnum, self.c_s_maxlen
        W = config.glove_dim
        print('embedding part')
        with tf.variable_scope("emb"):
            # with tf.variable_scope("char"):
            #         ch_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.csh_slice), [N, SN * SL, CL, dc], name='char_reshape')
            #         qh_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.qh_slice), [N, QL, CL, dc])
            #         ch_emb = dropout(
            #             ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #         qh_emb = dropout(
            #             qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            # ch_emb_char = tf.unstack(ch_emb, axis=0)
            # qh_emb_char = tf.unstack(qh_emb, axis=0)
            '''

            filter_size = [3, 4, 5]
            att_char = []
            merge_char = []
            q_merge_char = []
            for filter in filter_size:
                with tf.variable_scope("char-cnnencoder-%s" % filter):
                    step_merge_char = []
                    step_att_char = []
                    q_step_merge_char = []
                    q_step_att_char = []
                    for i in range(2):
                        if i==0:
                            input_char=ch_emb
                        else:
                            input_char=qh_emb
                        conv_branch_char = tf.layers.conv2d(
                            inputs=input_char,
                            # use as many filters as the hidden size
                            filters=50,
                            kernel_size=filter,
                            use_bias=True,
                            activation=tf.nn.relu,
                            trainable=True,
                            padding='SAME',
                            name = 'conv_char_' + str(filter),
                            reuse = tf.AUTO_REUSE,
                            data_format='channels_last'
                        )
                        if i ==0:
                            step_att_char.append(conv_branch_char)
                            # pool over the words to obtain: [first_dim x 1* hidden_size]
                            pool_branch_char = tf.reduce_max(conv_branch_char, axis=2)
                            merge_char.append(pool_branch_char)
                        else:
                            q_step_att_char.append(conv_branch_char)
                            # pool over the words to obtain: [first_dim x 1* hidden_size]
                            q_pool_branch_char = tf.reduce_max(conv_branch_char, axis=2)
                            q_merge_char.append(q_pool_branch_char)
                    # batch_merge = tf.stack(step_merge_char, axis=0)
                    # merge_char.append(batch_merge)
                    # batch_merge_q = tf.stack(q_step_merge_char, axis=0)
                    # q_merge_char.append(batch_merge_q)
            ch_con = tf.concat(merge_char, axis=-1)
            ch_con = tf.reshape(ch_con,[N,SN,SL,150])
            qh_con = tf.concat(q_merge_char,axis=-1)
            '''
            # if(use_char):
            #     with tf.variable_scope("char"):
            #         ch_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.csh), [N * SN * SL, CL, dc], name='char_reshape')
            #         qh_emb = tf.reshape(tf.nn.embedding_lookup(
            #             self.char_mat, self.qh), [N * QL, CL, dc])
            #         ch_emb = dropout(
            #             ch_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #         qh_emb = dropout(
            #             qh_emb, keep_prob=config.keep_prob, is_train=self.is_train)
            #         cell_fw = tf.contrib.rnn.GRUCell(dg)
            #         cell_bw = tf.contrib.rnn.GRUCell(dg)
            #         _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #             cell_fw, cell_bw, ch_emb, self.csh_len, dtype=tf.float32)
            #         ch_emb = tf.concat([state_fw, state_bw], axis=1)
            #         _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
            #             cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
            #         qh_emb = tf.concat([state_fw, state_bw], axis=1)
            #         qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
            #         ch_emb = tf.reshape(ch_emb, [N, SN, SL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.cs_slice)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q_slice)

            with tf.name_scope("softemb"):
                c_emb_linear = tf.nn.relu(
                    dense(c_emb, d, use_bias=True, scope="c_emb_linear"))
                q_emb_linear = tf.nn.relu(
                    dense(q_emb, d, use_bias=True, scope="q_emb_linear"))
                c_emb_linear = tf.reshape(
                    c_emb_linear, [N, self.c_s_maxnum * self.c_s_maxlen, d])
                align_cq = tf.matmul(c_emb_linear,
                                     tf.transpose(q_emb_linear, [0, 2, 1]))

                cq_mask = tf.tile(tf.expand_dims(self.q_mask, axis=1),
                                  [1, self.c_s_maxnum * self.c_s_maxlen, 1])
                self.align_cq = tf.nn.softmax(softmax_mask(align_cq, cq_mask))
                align_c_emb = tf.matmul(self.align_cq, q_emb_linear)
                align_c_emb = tf.reshape(
                    align_c_emb, [N, self.c_s_maxnum, self.c_s_maxlen, d])
            c_emb = tf.concat(
                [c_emb, align_c_emb, self.ce_slice, self.ct_slice], axis=3)
            c_emb = tf.reshape(
                c_emb, [N, self.c_s_maxnum, self.c_s_maxlen, W + d + 3 + 19],
                name='c_emb_reshape')

            q_emb = tf.concat([q_emb, self.qt_slice], axis=2)
            self.c_emb = c_emb
            self.q_emb = q_emb
            # c_emb = tf.reshape(c_emb, [N,self.c_s_maxnum,self.c_s_maxlen,W+self.q_maxlen])

        print('encode-part')
        # c_s_len = tf.unstack(self.c_s_len, axis=1)

        cnn_out = []
        c_s_emb = tf.unstack(c_emb, axis=0)
        # q_s_emb = tf.expand_dims(q_emb, axis=1)
        # q_sample_emb = tf.unstack(q_s_emb, axis = 0)

        filter_size = [3, 4, 5]
        att = []
        merge = []
        q_merge = []
        with tf.variable_scope("cnnencoder"):
            for filter in filter_size:
                step_merge = []
                step_att = []
                q_step_merge = []
                q_step_att = []
                with tf.variable_scope("cnnencoder-%s" % filter):
                    for i in range(N):
                        conv_branch = tf.layers.conv1d(
                            inputs=c_s_emb[i],
                            # use as many filters as the hidden size
                            filters=100,
                            kernel_size=[filter],
                            use_bias=True,
                            activation=tf.nn.relu,
                            trainable=True,
                            padding='SAME',
                            name='conv_' + str(filter),
                            reuse=tf.AUTO_REUSE)
                        # tf.get_variable_scope().reuse_variables()
                        step_att.append(conv_branch)
                        # pool over the words to obtain: [first_dim x 1* hidden_size]
                        pool_branch = tf.reduce_max(conv_branch, axis=1)
                        pool_branch = dropout(pool_branch,
                                              keep_prob=config.keep_prob,
                                              is_train=self.is_train)
                        step_merge.append(pool_branch)

                batch_merge = tf.stack(step_merge, axis=0)
                merge.append(batch_merge)
                # batch_merge_q = tf.stack(q_step_merge, axis = 0)
                # q_merge.append(batch_merge_q)

                con = tf.concat(merge, axis=-1)
                # q_con = tf.concat(q_merge, axis = -1)
                #
                # attention_vis = tf.stack(att, axis=0)
                # attention_vis = tf.reduce_mean(attention_vis, axis=0)
                # cnn_out.append(con)
                # c_sen_emb = tf.concat(con, axis = 0)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=con.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            print('passage-encoder')
            c_s = rnn(con, seq_len=self.c_p_len)
            # q = rnn(q_emb, seq_len=self.q_len)
        with tf.variable_scope("qencode"):
            with tf.variable_scope("encoding"):
                rnn = gru(num_layers=3,
                          num_units=d,
                          batch_size=N,
                          input_size=q_emb.get_shape().as_list()[-1],
                          keep_prob=config.keep_prob,
                          is_train=self.is_train)

                q = rnn(q_emb, seq_len=self.q_len)
        self.q_enc = q
        print('qc_att')

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c_s,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=config.keep_prob,
                                   is_train=self.is_train)

            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            self.att_s = rnn(qc_att, seq_len=self.c_p_len)

        # print('pointer')
        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train,
                              is_sentence=True)

            logits1 = pointer(init, self.att_s, d, self.c_p_mask)
            self.lo = logits1
        with tf.variable_scope("predict"):
            self.outer = tf.nn.softmax(logits1)
            self.yp = tf.argmax(self.outer, axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.y_slice))
            self.out1 = tf.nn.top_k(self.outer, config.k).values
            self.policy = tf.nn.top_k(self.outer, 1).values
            self.policy = tf.reduce_sum(tf.nn.top_k(self.outer,
                                                    config.k).values,
                                        axis=-1,
                                        keepdims=True)
            self.policy_log_part = tf.log(self.policy)
            #self.loss = tf.reduce_mean(-1 * self.policy_log_part * self.reward)
            reward = self.advantage
            reward_mean, reward_var = tf.nn.moments(reward, axes=[0])

            reward_std = tf.sqrt(reward_var) + 1e-6
            self.reward_mean = reward_mean
            self.reward_var = reward_std
            reward = tf.div(reward - reward_mean, reward_std)

            self.final_reward = reward - self.baseline
            self.loss = tf.reduce_mean(-1 * self.policy_log_part *
                                       self.advantage)
예제 #19
0
    def ready(self):
        N, PL, QL, CL, d, dc, dg = 64, self.c_maxlen, self.q_maxlen, char_limit, hidden, char_dim, char_hidden
        gru = cudnn_gru if use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1])
            c = rnn(c_emb, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c, q, mask=self.q_mask, hidden=d)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1])
            att = rnn(qc_att, seq_len=self.c_len)

        with tf.variable_scope("match"):
            self_att = dot_attention(att, att, mask=self.c_mask, hidden=d)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1])
            match = rnn(self_att, seq_len=self.c_len)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:], d, mask=self.q_mask)
            pointer = ptr_net(batch=N, hidden=init.get_shape().as_list()[-1])
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            #outer = tf.matrix_band_part(outer, 0, 15)
            outer = tf.matrix_band_part(outer, 0, 12)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
	def ready(self):
		config = self.config
		N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
		gru = cudnn_gru if config.use_cudnn else native_gru

		max_para = tf.reduce_max(self.passage_count)
		self.cell_fw = tf.contrib.rnn.GRUCell(dg)
		self.cell_bw = tf.contrib.rnn.GRUCell(dg)

		vp_concat = tf.zeros([N,1,300],tf.float32)
		clen_concat = tf.zeros([N,1],tf.int32)
		c_mask_concat = tf.cast(tf.zeros([N,1]),tf.bool)
		y1_concat = y2_concat = tf.zeros([N,1])
		seq_mask_concat = tf.cast(tf.zeros([N,1]),tf.bool) # maybe seq mask is = c_mask
		q = tf.zeros([N,1,1])
		for i in range(config.max_para):
			i_ = tf.constant(i)
			#print_out(i)
			def vp():
				att, c_len, c_mask, y1, y2, seq_mask = self.get_vp(i)
				
				c_len = tf.reshape(c_len,[N,1])
				att, c_len, c_mask, y1, y2, seq_mask = tf.cond(
					tf.equal(i_,tf.constant(0)),
					lambda: (att, c_len, c_mask, y1, y2, seq_mask),
					lambda: (
						tf.concat([vp_concat, att], axis=1),
						tf.concat([clen_concat, c_len], axis=1),
						tf.concat([c_mask_concat, c_mask], axis=1),
						tf.concat([y1_concat, y1], axis=1),
						tf.concat([y2_concat, y2], axis=1),
						tf.concat([seq_mask_concat, seq_mask], axis=1),
					)
				)
				return att, c_len, c_mask, y1, y2, seq_mask

			def dummy(): return vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat

			vp_concat, clen_concat, c_mask_concat, y1_concat, y2_concat, seq_mask_concat \
				= tf.cond(i_ < max_para, vp, dummy)

		vp_mask_count = tf.reduce_sum(clen_concat, axis=1)
		
		# max count w.r.t original concatenated context (self.c_len)
		vpmccl = vp_mask_max_count_c_like = tf.reduce_max(vp_mask_count)
		# max count w.r.t concatenated vp (self.att_vP) 
		##### not used:
		vp_mask_max_count = tf.reduce_max(tf.reduce_max(clen_concat))
		
		vp_final_pad_meta = vp_mask_max_count_c_like - vp_mask_count

		# dont know why this diff happens, but it does
		diff = tf.shape(self.c_mask)[-1] - vp_mask_max_count_c_like

		vp_final_pad_seq = tf.sequence_mask(vp_final_pad_meta+diff)
		seq_mask_concat1 = tf.concat([seq_mask_concat, vp_final_pad_seq], axis=1)

		pad_length = tf.reduce_max(vp_final_pad_meta)+diff
		paddings = tf.convert_to_tensor([[0, 0], [0, pad_length], [0, 0]])
		new_vp = tf.pad(vp_concat, paddings, "CONSTANT")

		new_vp = tf.reshape(tf.boolean_mask(new_vp, seq_mask_concat1), 
			[N, vpmccl+diff, 2*config.hidden]
		)

		"""
		new_vp = tf.Print(new_vp,["vp_mask_max_count_c_like",vp_mask_max_count_c_like,
			"vp_final_pad_meta",vp_final_pad_meta,
			"vp_concat",tf.shape(vp_concat),"new_vp",tf.shape(new_vp),
			"c_mask",tf.shape(self.c_mask),"seq_mask_concat",tf.shape(seq_mask_concat),
			"clen_concat",clen_concat,"c_mask_last",self.c_mask[:,-1],
			"vp_mask_count",vp_mask_count,"c_len",self.c_len],
			summarize=N*10,message="SHORT")
		"""
		
		#self.c_mask = tf.concat([self.c_mask,vp_final_pad_seq],axis=1)
		with tf.variable_scope("pointer"):
			# r_Q:
			init = summ(self.q_enc[:, :, -2 * d:], d, mask=self.q_mask,
						keep_prob=config.ptr_keep_prob, is_train=self.is_train)

			pointer = ptr_net(batch=N, hidden=init.get_shape().as_list(
			)[-1], keep_prob=config.ptr_keep_prob, is_train=self.is_train)
			#logits1, logits2 = pointer(init, new_vp, d, self.c_mask)
			logits1, logits2 = pointer(init, new_vp, d, self.c_mask)

		with tf.variable_scope("predict"):
			outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
							  tf.expand_dims(tf.nn.softmax(logits2), axis=1))
			outer = tf.matrix_band_part(outer, 0, 15)
			self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
			self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
			losses = tf.nn.softmax_cross_entropy_with_logits(
				logits=logits1, labels=self.y1)
			losses2 = tf.nn.softmax_cross_entropy_with_logits(
				logits=logits2, labels=self.y2)
			self.loss = tf.reduce_mean(losses + losses2)
			#losses = tf.nn.softmax_cross_entropy_with_logits_v2(
			#	logits=logits1, labels=tf.stop_gradient(self.y1))
			#losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
			#	logits=logits2, labels=tf.stop_gradient(self.y2))
			#self.loss = tf.reduce_mean(losses + losses2)

		c_max = tf.reduce_max(clen_concat, axis=0)
		print(c_max)
		g_concat = tf.zeros([N,1])

		count = tf.constant(0)

		if config.with_passage_ranking:
			with tf.variable_scope("passage_ranking"):
				
				for i in range(config.max_para):
					i_ = tf.constant(i)
					def passage_ranking():
						global count
						print(c_max[i])
						if i==0:
							#vp = tf.slice(vp_concat,[0,0,0],[N,c_max[i],])
							c_max1 = tf.Print(c_max,[c_max],message="C_MAX")
							
							vp = vp_concat[:,:c_max[i],:]
							mask = seq_mask_concat[:,:c_max[i]]
							count = c_max[i]
						else:
							vp = vp_concat[:,count:count+c_max[i],:]
							mask = seq_mask_concat[:,count:count+c_max[i]]
							count += c_max[i]

						#g = pr_attention(init, vp, mask=mask, hidden=d,
						#	keep_prob=config.keep_prob, is_train=self.is_train, name_scope="rP_attention")
						#g = tf.reshape(g,[N,1])
						g = summ2(vp, init, max_para, d, mask, keep_prob=config.keep_prob,
							is_train=self.is_train, scope="summ")

						if i==0:
							return g,count
						return tf.concat([g_concat,g],axis=1),count
					def dummy():
						return g_concat,count
					g_concat,count = tf.cond(i_ < max_para, passage_ranking,dummy)

			self.losses3 = tf.nn.softmax_cross_entropy_with_logits(
						logits=g_concat, labels=self.passage_ranking)
			#self.losses3 = tf.Print(self.losses3,[self.losses3,tf.reduce_max(self.losses3),
			#	tf.reduce_max(self.pr),tf.reduce_max(gi)],message="losses3:")
			self.pr_loss = tf.reduce_mean(self.losses3)
			#self.pr_loss = tf.Print(self.pr_loss,[self.pr_loss])
			r = tf.constant(0.8)
			one_minus_r = tf.constant(0.2)
			self.ee_loss1 = tf.multiply(r,self.loss)
			self.ee_loss2 = tf.multiply(one_minus_r,self.pr_loss)
			self.ee_loss = tf.add(self.ee_loss1, self.ee_loss2)
예제 #21
0
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden
        gru = cudnn_gru if config.use_cudnn else native_gru

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                ch_emb = dropout(ch_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                qh_emb = dropout(qh_emb,
                                 keep_prob=config.keep_prob,
                                 is_train=self.is_train)
                cell_fw = tf.contrib.rnn.GRUCell(dg)
                cell_bw = tf.contrib.rnn.GRUCell(dg)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, ch_emb, self.ch_len, dtype=tf.float32)
                ch_emb = tf.concat([state_fw, state_bw], axis=1)
                _, (state_fw, state_bw) = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, qh_emb, self.qh_len, dtype=tf.float32)
                qh_emb = tf.concat([state_fw, state_bw], axis=1)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            self.c_emb = c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

            bad_c_emb = tf.stop_gradient(c_emb)
            bad_q_emb = tf.stop_gradient(q_emb)

        with tf.variable_scope("encoding"):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=bad_c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            self.c_rnn = rnn(bad_c_emb, seq_len=self.c_len)
            self.q_rnn = rnn(bad_q_emb, seq_len=self.q_len)

            badptr_c = tf.stop_gradient(self.c_rnn)
            badptr_q = tf.stop_gradient(self.q_rnn)
            old_rnn = rnn

        with tf.variable_scope("badptr_attention"):
            qc_att, self.badptr_qc_att = dot_attention(
                badptr_c,
                badptr_q,
                mask=self.q_mask,
                hidden=d,
                keep_prob=config.keep_prob,
                is_train=self.is_train,
                give=True)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)

            self.att = [rnn(qc_att, seq_len=self.c_len)]
            self.att += [self.att[-1][:, -1, :]]

        with tf.variable_scope("badptr_dense"):
            for _ in range(3):
                self.att += [
                    tf.nn.dropout(tf.keras.layers.Dense(300)(self.att[-1]),
                                  keep_prob=config.keep_prob)
                ]

        with tf.variable_scope("badptr"):
            init = self.att[-1]
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, self.att[0], d, self.c_mask)

        with tf.variable_scope("badptr_predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.bad_yp1_distrib = tf.reduce_max(outer, axis=2)
            self.bad_yp2_distrib = tf.reduce_max(outer, axis=1)
            self.bad_yp1 = tf.argmax(self.bad_yp1_distrib, axis=1)
            self.bad_yp2 = tf.argmax(self.bad_yp2_distrib, axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits1, labels=tf.stop_gradient(self.bad_y1))
            losses2 = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=logits2, labels=tf.stop_gradient(self.bad_y2))
            self.loss = tf.reduce_mean(losses + losses2)

        # recompute c with bitmask
        left = tf.sequence_mask(self.bad_yp1, tf.shape(c_emb)[1])
        right = tf.logical_not(
            tf.sequence_mask(self.bad_yp2 + 1,
                             tf.shape(c_emb)[1]))
        self.combo = combo = tf.logical_or(left, right)

        ### FOR TESTING ###
        ## self.combo = combo = tf.cast(tf.ones_like(combo), tf.bool)

        def adjust(c_emb_combo):
            c_emb, combo = c_emb_combo
            foo = c_emb
            bar = tf.boolean_mask(foo, combo)

            return tf.cond(
                tf.logical_and(tf.equal(combo[0], False),
                               tf.equal(combo[1], True)),
                false_fn=lambda: tf.pad(
                    bar, [[0, tf.shape(foo)[0] - tf.shape(bar)[0]], [0, 0]]),
                true_fn=lambda: foo)

        self.c_emb_new = c_emb_new = tf.map_fn(adjust, (c_emb, combo),
                                               dtype=(tf.float32))
        self.c_len = tf.reduce_sum(tf.cast(
            tf.logical_and(self.c_mask, self.combo), tf.int32),
                                   axis=-1)
        self.c_mask = tf.sequence_mask(
            tf.reduce_sum(tf.cast(tf.logical_and(self.c_mask, self.combo),
                                  tf.int32),
                          axis=-1),
            tf.shape(self.c_mask)[1])

        with tf.variable_scope("encoding", reuse=True):
            rnn = gru(num_layers=3,
                      num_units=d,
                      batch_size=N,
                      input_size=c_emb.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train,
                      super_hacky_reload=True)
            #### SEQ LEN HAS TO BE FIXED!!!! ####
            c = rnn(c_emb_new, seq_len=self.c_len)
            q = rnn(q_emb, seq_len=self.q_len)

        self.c_ck = c
        self.q_ck = c

        ### MAKE SURE THESE ARE RUN!!! ###
        print('RUN ASSIGN TRICK OPS (model.assign_trick_ops)!!')
        self.assign_trick_ops = []
        for i in range(len(rnn.init_fw)):
            self.assign_trick_ops += [
                tf.assign(rnn.init_fw[i], old_rnn.init_fw[i])
            ]
            self.assign_trick_ops += [
                tf.assign(rnn.init_bw[i], old_rnn.init_bw[i])
            ]

        with tf.variable_scope("attention"):
            qc_att, self.qc_att = dot_attention(c,
                                                q,
                                                mask=self.q_mask,
                                                hidden=d,
                                                keep_prob=config.keep_prob,
                                                is_train=self.is_train,
                                                give=True)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=qc_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            att = rnn(qc_att, seq_len=self.c_len)

        self.att_ck = att

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=config.keep_prob,
                                     is_train=self.is_train)
            rnn = gru(num_layers=1,
                      num_units=d,
                      batch_size=N,
                      input_size=self_att.get_shape().as_list()[-1],
                      keep_prob=config.keep_prob,
                      is_train=self.is_train)
            match = rnn(self_att, seq_len=self.c_len)

        self.match_ck = match

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=config.ptr_keep_prob,
                        is_train=self.is_train)
            pointer = ptr_net(batch=N,
                              hidden=init.get_shape().as_list()[-1],
                              keep_prob=config.ptr_keep_prob,
                              is_train=self.is_train)
            logits1, logits2 = pointer(init, match, d, self.c_mask)

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1_distrib = tf.reduce_max(outer, axis=2)
            self.yp2_distrib = tf.reduce_max(outer, axis=1)
            self.yp1 = tf.argmax(self.yp1_distrib, axis=1)
            self.yp2 = tf.argmax(self.yp2_distrib, axis=1)
예제 #22
0
파일: model.py 프로젝트: Meinwerk/R-Net
    def ready(self):
        config = self.config
        N, PL, QL, CL, d, dc, dg = config.batch_size, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.char_hidden

        with tf.variable_scope("emb"):
            with tf.variable_scope("char"):
                ch_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.ch),
                    [N * PL, CL, dc])
                qh_emb = tf.reshape(
                    tf.nn.embedding_lookup(self.char_mat, self.qh),
                    [N * QL, CL, dc])
                _, qh_emb = stacked_gru(qh_emb,
                                        dg,
                                        num_layers=1,
                                        seq_len=self.qh_len,
                                        keep_prob=self.keep_prob,
                                        is_train=self.is_train)
                tf.get_variable_scope().reuse_variables()
                _, ch_emb = stacked_gru(ch_emb,
                                        dg,
                                        num_layers=1,
                                        seq_len=self.ch_len,
                                        keep_prob=self.keep_prob,
                                        is_train=self.is_train)
                qh_emb = tf.reshape(qh_emb, [N, QL, 2 * dg])
                ch_emb = tf.reshape(ch_emb, [N, PL, 2 * dg])

            with tf.name_scope("word"):
                c_emb = tf.nn.embedding_lookup(self.word_mat, self.c)
                q_emb = tf.nn.embedding_lookup(self.word_mat, self.q)

            c_emb = tf.concat([c_emb, ch_emb], axis=2)
            q_emb = tf.concat([q_emb, qh_emb], axis=2)

        with tf.variable_scope("encoding"):
            c, _ = stacked_gru(c_emb,
                               d,
                               batch=N,
                               num_layers=3,
                               seq_len=self.c_len,
                               keep_prob=self.keep_prob,
                               is_train=self.is_train)
            tf.get_variable_scope().reuse_variables()
            q, _ = stacked_gru(q_emb,
                               d,
                               batch=N,
                               num_layers=3,
                               seq_len=self.q_len,
                               keep_prob=self.keep_prob,
                               is_train=self.is_train)

        with tf.variable_scope("attention"):
            qc_att = dot_attention(c,
                                   q,
                                   mask=self.q_mask,
                                   hidden=d,
                                   keep_prob=self.keep_prob,
                                   is_train=self.is_train)
            att, _ = stacked_gru(qc_att,
                                 d,
                                 num_layers=1,
                                 seq_len=self.c_len,
                                 keep_prob=self.keep_prob,
                                 is_train=self.is_train)

        with tf.variable_scope("match"):
            self_att = dot_attention(att,
                                     att,
                                     mask=self.c_mask,
                                     hidden=d,
                                     keep_prob=self.keep_prob,
                                     is_train=self.is_train)
            match, _ = stacked_gru(self_att,
                                   d,
                                   num_layers=1,
                                   seq_len=self.c_len,
                                   keep_prob=self.keep_prob,
                                   is_train=self.is_train)

        with tf.variable_scope("pointer"):
            init = summ(q[:, :, -2 * d:],
                        d,
                        mask=self.q_mask,
                        keep_prob=self.ptr_keep_prob,
                        is_train=self.is_train)
            d_match = dropout(match,
                              keep_prob=self.ptr_keep_prob,
                              is_train=self.is_train)
            hidden = init.get_shape().as_list()[-1]
            cell_fw = GRUCell(hidden)
            cell_bw = GRUCell(hidden)
            with tf.variable_scope("fw"):
                inp, logits1_fw = pointer(d_match, init, d, mask=self.c_mask)
                _, state = cell_fw(inp, init)
                tf.get_variable_scope().reuse_variables()
                _, logits2_fw = pointer(d_match, state, d, mask=self.c_mask)
            with tf.variable_scope("bw"):
                inp, logits2_bw = pointer(d_match, init, d, mask=self.c_mask)
                _, state = cell_bw(inp, init)
                tf.get_variable_scope().reuse_variables()
                _, logits1_bw = pointer(d_match, state, d, mask=self.c_mask)
            logits1 = (logits1_fw + logits1_bw) / 2.
            logits2 = (logits2_fw + logits2_bw) / 2.

        with tf.variable_scope("predict"):
            outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2),
                              tf.expand_dims(tf.nn.softmax(logits2), axis=1))
            outer = tf.matrix_band_part(outer, 0, 15)
            self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1)
            self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1)
            losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1,
                                                             labels=self.y1)
            losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2,
                                                              labels=self.y2)
            self.loss = tf.reduce_mean(losses + losses2)