예제 #1
0
 def _build_graph(self):
     """
     Builds the computation graph with Tensorflow
     """
     start_t = time.time()
     self._setup_placeholders()
     self._embed()
     self._encode()
     self._fuse()
     self._decode()
     self._compute_loss()
     self._create_train_op()
     self.logger.info('Time to build graph: {} s'.format(time.time() - start_t))
     param_num = total_params(tf.trainable_variables())
     self.logger.info('There are {} parameters in the model'.format(param_num))
예제 #2
0
    def __init__(self,
                 config,
                 batch,
                 word_mat=None,
                 char_mat=None,
                 trainable=True,
                 opt=True,
                 demo=False,
                 graph=None):
        self.config = config
        self.demo = demo
        self.graph = graph if graph is not None else tf.Graph()
        with self.graph.as_default():

            self.global_step = tf.get_variable(
                'global_step',
                shape=[],
                dtype=tf.int32,
                initializer=tf.constant_initializer(0),
                trainable=False)
            self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")
            if self.demo:
                self.c = tf.placeholder(tf.int32,
                                        [None, config.test_para_limit],
                                        "context")
                self.q = tf.placeholder(tf.int32,
                                        [None, config.test_ques_limit],
                                        "question")
                self.ch = tf.placeholder(
                    tf.int32,
                    [None, config.test_para_limit, config.char_limit],
                    "context_char")
                self.qh = tf.placeholder(
                    tf.int32,
                    [None, config.test_ques_limit, config.char_limit],
                    "question_char")
                self.y1 = tf.placeholder(tf.int32,
                                         [None, config.test_para_limit],
                                         "answer_index1")
                self.y2 = tf.placeholder(tf.int32,
                                         [None, config.test_para_limit],
                                         "answer_index2")
            else:
                self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next(
                )

            # self.word_unk = tf.get_variable("word_unk", shape = [config.glove_dim], initializer=initializer())
            self.word_mat = tf.get_variable("word_mat",
                                            initializer=tf.constant(
                                                word_mat, dtype=tf.float32),
                                            trainable=False)
            self.char_mat = tf.get_variable("char_mat",
                                            initializer=tf.constant(
                                                char_mat, dtype=tf.float32))

            self.c_mask = tf.cast(self.c, tf.bool)
            self.q_mask = tf.cast(self.q, tf.bool)
            self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)
            self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)

            if opt:
                N, CL = config.batch_size if not self.demo else 1, config.char_limit
                self.c_maxlen = tf.reduce_max(self.c_len)
                self.q_maxlen = tf.reduce_max(self.q_len)
                self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen])
                self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen])
                self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen])
                self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen])
                self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL])
                self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL])
                self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen])
                self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen])
            else:
                self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit

            self.ch_len = tf.reshape(
                tf.reduce_sum(tf.cast(tf.cast(self.ch, tf.bool), tf.int32),
                              axis=2), [-1])
            self.qh_len = tf.reshape(
                tf.reduce_sum(tf.cast(tf.cast(self.qh, tf.bool), tf.int32),
                              axis=2), [-1])

            self.forward()
            total_params()

            if trainable:
                self.lr = tf.minimum(
                    config.learning_rate, 0.001 / tf.log(999.) *
                    tf.log(tf.cast(self.global_step, tf.float32) + 1))
                self.opt = tf.train.AdamOptimizer(learning_rate=self.lr,
                                                  beta1=0.8,
                                                  beta2=0.999,
                                                  epsilon=1e-7)
                grads = self.opt.compute_gradients(self.loss)
                gradients, variables = zip(*grads)
                capped_grads, _ = tf.clip_by_global_norm(
                    gradients, config.grad_clip)
                self.train_op = self.opt.apply_gradients(
                    zip(capped_grads, variables), global_step=self.global_step)
    def __init__(self,
                 config,
                 word_mat=None,
                 char_mat_trainable=None,
                 char_mat_fix=None,
                 test=False):

        # hyper-parameter
        self.char_dim = config['char_dim']
        self.cont_limit = config['cont_limit'] if not test else 1000
        self.ques_limit = config['ques_limit'] if not test else 50
        self.char_limit = config['char_limit']
        self.ans_limit = config['ans_limit']
        self.filters = config['filters']
        self.char_filters = config['char_filters']
        self.batch_size = config['batch_size']
        self.l2_norm = config['l2_norm']
        self.decay = config['decay']
        self.learning_rate = config['learning_rate']
        self.grad_clip = config['grad_clip']
        self.init_lambda = config['init_lambda']
        self.gamma_b = config['gamma_b']
        self.gamma_c = config['gamma_c']
        self.use_elmo = config['use_elmo']
        self.use_cove = config['use_cove']
        self.use_feat = config['use_feat']
        self.use_rlloss = config['use_rlloss']
        self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")
        self.dropout_rnn = tf.placeholder_with_default(0.0, (),
                                                       name="dropout_rnn")
        self.dropout_emb = tf.placeholder_with_default(0.0, (),
                                                       name="dropout_emb")
        self.dropout_att = tf.placeholder_with_default(0.0, (),
                                                       name="dropout_att")
        self.un_size = tf.placeholder_with_default(self.batch_size, (),
                                                   name="un_size")
        self.rlw = tf.placeholder_with_default(0.0, (), name="rlloss_weights")

        # embedding layer
        self.word_mat = tf.get_variable("word_mat",
                                        initializer=tf.constant(
                                            word_mat, dtype=tf.float32),
                                        trainable=False)
        with tf.variable_scope("Input_Embedding_Mat"):
            self.char_mat = tf.get_variable(
                "char_mat",
                initializer=np.concatenate([char_mat_trainable, char_mat_fix],
                                           axis=0),
                trainable=True)

        # input tensor
        self.contw_input = tf.placeholder(tf.int32, [None, None],
                                          "context_word")
        self.quesw_input = tf.placeholder(tf.int32, [None, None],
                                          "question_word")
        self.contc_input = tf.placeholder(tf.int32,
                                          [None, None, self.char_limit],
                                          "context_char")
        self.quesc_input = tf.placeholder(tf.int32,
                                          [None, None, self.char_limit],
                                          "question_char")
        self.y_start = tf.placeholder(tf.int32, [None, None],
                                      "answer_start_index")
        self.y_end = tf.placeholder(tf.int32, [None, None], "answer_end_index")
        self.contw_elmo_id = tf.placeholder(tf.int32, [None, None, 50],
                                            'contw_elmo_id')
        self.quesw_elmo_id = tf.placeholder(tf.int32, [None, None, 50],
                                            'quesw_elmo_id')
        if self.use_feat:
            self.cont_feat = tf.placeholder(tf.float32, [None, None, 73],
                                            "cont_feat")
            self.ques_feat = tf.placeholder(tf.float32, [None, None, 73],
                                            "ques_feat")
        self.old_char_mat = tf.placeholder(tf.float32, [None, None],
                                           "old_char_mat")
        self.assign_char_mat = tf.assign(self.char_mat, self.old_char_mat)

        # get mask & length for words & chars
        self.c_mask = tf.cast(self.contw_input, tf.bool)
        self.q_mask = tf.cast(self.quesw_input, tf.bool)
        self.cont_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)
        self.ques_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)

        # slice for maxlen in each batch
        self.c_maxlen = tf.reduce_max(self.cont_len)
        self.q_maxlen = tf.reduce_max(self.ques_len)

        # elmo features
        if self.use_elmo == 2:
            options_file = config['elmo_options_path']
            weight_file = config['elmo_weights_path']
            bilm = BidirectionalLanguageModel(options_file, weight_file)
            self.elmo_cont = all_layers(bilm(
                self.contw_elmo_id))  # [bs, 3, len, 1024]
            self.elmo_cont = tf.transpose(self.elmo_cont,
                                          [0, 2, 1, 3])  # [bs, len, 3, 1024]
            self.elmo_ques = all_layers(bilm(self.quesw_elmo_id))
            self.elmo_ques = tf.transpose(self.elmo_ques, [0, 2, 1, 3])
        elif self.use_elmo == 1:
            self.elmo_cont = tf.placeholder(tf.float32, [None, None, 3, 1024],
                                            'elmo_cont')
            self.elmo_ques = tf.placeholder(tf.float32, [None, None, 3, 1024],
                                            'elmo_ques')

        if self.use_cove == 2:
            with tf.variable_scope('Cove_Layer'):
                self.cove_model = load_model(config['cove_path'])
        elif self.use_cove == 1:
            self.cove_cont = tf.placeholder(tf.float32, [None, None, 2, 600],
                                            'cove_cont')
            self.cove_ques = tf.placeholder(tf.float32, [None, None, 2, 600],
                                            'cove_ques')

        # lr schedule
        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False)

        self.learning_rate = tf.placeholder_with_default(
            config['learning_rate'], (), name="learning_rate")
        self.lr = self.learning_rate
        # self.lr = tf.minimum(self.learning_rate,
        #                      self.learning_rate / tf.log(999.) * tf.log(tf.cast(self.global_step, tf.float32) + 1))

        # initial model & complie
        self.build_model()
        total_params()
        self.complie()
예제 #4
0
파일: model.py 프로젝트: txye/QANet
    def __init__(self, config, batch, word_mat=None, char_mat=None, trainable=True, opt=True, demo = False, graph = None):
        self.config = config
        self.demo = demo
        self.graph = graph if graph is not None else tf.Graph()
        with self.graph.as_default():

            self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int32,
                                               initializer=tf.constant_initializer(0), trainable=False)
            self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")
            if self.demo:
                self.c = tf.placeholder(tf.int32, [None, config.test_para_limit],"context")
                self.q = tf.placeholder(tf.int32, [None, config.test_ques_limit],"question")
                self.ch = tf.placeholder(tf.int32, [None, config.test_para_limit, config.char_limit],"context_char")
                self.qh = tf.placeholder(tf.int32, [None, config.test_ques_limit, config.char_limit],"question_char")
                self.y1 = tf.placeholder(tf.int32, [None, config.test_para_limit],"answer_index1")
                self.y2 = tf.placeholder(tf.int32, [None, config.test_para_limit],"answer_index2")
            else:
                self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next()

            # self.word_unk = tf.get_variable("word_unk", shape = [config.glove_dim], initializer=initializer())
            self.word_mat = tf.get_variable("word_mat", initializer=tf.constant(
                word_mat, dtype=tf.float32), trainable=False)
            self.char_mat = tf.get_variable(
                "char_mat", initializer=tf.constant(char_mat, dtype=tf.float32))

            self.c_mask = tf.cast(self.c, tf.bool)
            self.q_mask = tf.cast(self.q, tf.bool)
            self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)
            self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)

            if opt:
                N, CL = config.batch_size if not self.demo else 1, config.char_limit
                self.c_maxlen = tf.reduce_max(self.c_len)
                self.q_maxlen = tf.reduce_max(self.q_len)
                self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen])
                self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen])
                self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen])
                self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen])
                self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL])
                self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL])
                self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen])
                self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen])
            else:
                self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit

            self.ch_len = tf.reshape(tf.reduce_sum(
                tf.cast(tf.cast(self.ch, tf.bool), tf.int32), axis=2), [-1])
            self.qh_len = tf.reshape(tf.reduce_sum(
                tf.cast(tf.cast(self.qh, tf.bool), tf.int32), axis=2), [-1])

            self.forward()
            total_params()

            if trainable:
                self.lr = tf.minimum(config.learning_rate, 0.001 / tf.log(999.) * tf.log(tf.cast(self.global_step, tf.float32) + 1))
                self.opt = tf.train.AdamOptimizer(learning_rate = self.lr, beta1 = 0.8, beta2 = 0.999, epsilon = 1e-7)
                grads = self.opt.compute_gradients(self.loss)
                gradients, variables = zip(*grads)
                capped_grads, _ = tf.clip_by_global_norm(
                    gradients, config.grad_clip)
                self.train_op = self.opt.apply_gradients(
                    zip(capped_grads, variables), global_step=self.global_step)
예제 #5
0
    def __init__(self, config, batch,
                 word_mat=None, char_mat=None, trainable=True, opt=True, demo = False, graph = None):

        self.config = config
        self.demo = demo
        self.debug_ops = []

        self.graph = graph if graph is not None else tf.Graph()
        with self.graph.as_default():
            self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int32,
                                               initializer=tf.constant_initializer(0), trainable=False)
            self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")
            if self.demo:
                self.c_elmo = tf.placeholder(tf.int32, [None, config.test_para_limit + 2, 50], "context_elmo_idxs")
                self.q_elmo = tf.placeholder(tf.int32, [None, config.test_ques_limit + 2, 50], "question_elmo_idxs")
                self.x_elmo = tf.placeholder(tf.int32, [None, config.cand_limit, 50 + 2], "candidates_elmo_idxs")
                self.c = tf.placeholder(tf.int32, [None, config.test_para_limit], "context")
                self.q = tf.placeholder(tf.int32, [None, config.test_ques_limit], "question")
                self.x = tf.placeholder(tf.int32, [None, config.cand_limit], "candidates")
                self.ch = tf.placeholder(tf.int32, [None, config.test_para_limit, config.char_limit], "context_char")
                self.qh = tf.placeholder(tf.int32, [None, config.test_ques_limit, config.char_limit],"question_char")
                self.xh = tf.placeholder(tf.int32, [None, config.cand_limit, config.char_limit], "candidate_char")
                self.y1 = tf.placeholder(tf.int32, [None, config.test_para_limit], "answer_index1")
                self.y2 = tf.placeholder(tf.int32, [None, config.test_para_limit], "answer_index2")
            else:
                '''
                    get_next(): Returns a nested structure of tf.Tensors representing the next element.
                    In graph mode, you should typically call this method once and use its result as the input to 
                    another computation. A typical loop will call tf.Session.run on the result of that computation
                    
                    features = tf.parse_single_example(example,
                                       features={
                                           "context_idxs": tf.FixedLenFeature([], tf.string),
                                           "ques_idxs": tf.FixedLenFeature([], tf.string),
                                           "cand_idxs": tf.FixedLenFeature([], tf.string),
                                           "context_char_idxs": tf.FixedLenFeature([], tf.string),
                                           "ques_char_idxs": tf.FixedLenFeature([], tf.string),
                                           "cand_char_idxs": tf.FixedLenFeature([], tf.string),
                                           "cand_label": tf.FixedLenFeature([], tf.string),
                                           "y1": tf.FixedLenFeature([], tf.string),
                                           "y2": tf.FixedLenFeature([], tf.string),
                                           "id": tf.FixedLenFeature([], tf.int64)
                                       })
                
                    c:     Tensor("IteratorGetNext:0", shape=(N, 500), dtype=int32)
                    q:     Tensor("IteratorGetNext:1", shape=(N, 50), dtype=int32)
                    x:     Tensor("IteratorGetNext:2", shape=(N, 50), dtype=int32)
                    ch:    Tensor("IteratorGetNext:3", shape=(N, 500, 16), dtype=int32)
                    qh:    Tensor("IteratorGetNext:3", shape=(N, 50, 16), dtype=int32)
                    xh:    Tensor("IteratorGetNext:4", shape=(N, 50, 16), dtype=int32)
                    yx:    Tensor("IteratorGetNext:6", shape=(N, 100), dtype=float32)
                    y1:    Tensor("IteratorGetNext:5", shape=(N, 500), dtype=float32)
                    y2:    Tensor("IteratorGetNext:6", shape=(N, 500), dtype=float32)
                    qa_id: Tensor("IteratorGetNext:7", shape=(N,), dtype=int64)
                    
                '''
                # batch: train_dataset iterator
                self.c_elmo, self.q_elmo, self.x_elmo, \
                self.c, self.q, self.x, \
                self.ch, self.qh, self.xh, \
                self.yx, self.xp, self.y1, self.y2, self.qa_id = batch.get_next()

            if self.config.max_margin:
                self.yx_inv = 1 - self.yx

            # TODO


            self.word_mat = tf.get_variable("word_mat", initializer=tf.constant(word_mat, dtype=tf.float32),
                                            trainable=False)
            self.char_mat = tf.get_variable("char_mat", initializer=tf.constant(char_mat, dtype=tf.float32))

            # all initialized to the max_length matrices with zeros --> 1's cover actual lengths
            self.c_mask = tf.cast(self.c, tf.bool) # Tensor("Cast:0", shape=(N, 500), dtype=bool)
            self.q_mask = tf.cast(self.q, tf.bool) # Tensor("Cast_1:0", shape=(N, 50), dtype=bool)
            self.x_mask = tf.cast(self.x, tf.bool) # Tensor("Cast_2:0", shape=(N, 100), dtype=bool)

            # Tensor("Sum:0", shape=(N,), dtype=int32)
            self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)
            # Tensor("Sum:0", shape=(N,), dtype=int32)
            self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)
            # Tensor("Sum:0", shape=(N,), dtype=int32)
            self.x_len = tf.reduce_sum(tf.cast(self.x_mask, tf.int32), axis=1)

            '''
                tf.slice(input_, begin, size, name=None): extracts a slice of size from a tensor input 
                    starting at the location specified by begin. The slice size is represented as a tensor shape, 
                    where size[i] is the number of elements of the 'i'th dimension of input that you want to slice. 
                    The (begin) for the slice is represented as an offset in each dimension of input.
                    In other words, begin[i] is the offset into the 'i'th dim of input that you want to slice from.
            '''

            # Memory space optimization
            if opt:
                N = config.batch_size if not self.demo else 1
                CL = config.char_limit
                self.c_maxlen = tf.reduce_max(self.c_len)
                self.q_maxlen = tf.reduce_max(self.q_len)
                self.x_maxlen = tf.reduce_max(self.x_len)
                self.c_elmo = tf.slice(self.c_elmo, [0, 0, 0], [N, self.c_maxlen + 2, 50])                     # shape=(N, PL, 50)
                self.q_elmo = tf.slice(self.q_elmo, [0, 0, 0], [N, self.q_maxlen + 2, 50])                     # shape=(N, QL, 50)
                self.x_elmo = tf.slice(self.x_elmo, [0, 0, 0], [N, self.x_maxlen + 2, 50])                     # shape=(N, XL, 50)
                self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen])                     # shape=(N, PL)
                self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen])                     # shape=(N, QL)
                self.x = tf.slice(self.x, [0, 0], [N, self.x_maxlen])                     # shape=(N, XL)
                self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen])           # shape=(N, PL)
                self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen])           # shape=(N, QL)
                self.x_mask = tf.slice(self.x_mask, [0, 0], [N, self.x_maxlen])           # shape=(N, XL)
                self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL])            # shape=(N, PL, 16)
                self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL])            # shape=(N, QL, 16)
                self.xh = tf.slice(self.xh, [0, 0, 0], [N, self.x_maxlen, CL])            # shape=(N, XL, 16)
                self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen])                   # shape=(N, PL)
                self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen])                   # shape=(N, PL)
                self.yx = tf.slice(self.yx, [0, 0], [N, self.x_maxlen])                   # shape=(N, XL)

                if self.config.cand_condense_vector:
                    self.xp = tf.slice(self.xp, [0, 0, 0], [N, self.x_maxlen, self.c_maxlen]) # shape=(N, XL, PL)

                if self.config.max_margin:
                    self.yx_inv = tf.slice(self.yx_inv, [0, 0], [N, self.x_maxlen])       # shape=(N, x_maxlen)
            else:
                self.c_maxlen, self.q_maxlen, self.x_maxlen = config.para_limit, config.ques_limit, config.cand_limit

            # DEBUG
            self.debug_ops.extend([self.xp, self.yx, self.y1])

            # shape=(N * c_maxlen)
            self.ch_len = tf.reshape(tf.reduce_sum(tf.cast(tf.cast(self.ch, tf.bool), tf.int32), axis=2), [-1])
            # shape=(N * q_maxlen)
            self.qh_len = tf.reshape(tf.reduce_sum(tf.cast(tf.cast(self.qh, tf.bool), tf.int32), axis=2), [-1])

            self.forward()
            total_params()

            if trainable:
                self.lr = tf.minimum(config.learning_rate, 0.001 / tf.log(999.)
                                     * tf.log(tf.cast(self.global_step, tf.float32) + 1))
                self.opt = tf.train.AdamOptimizer(learning_rate = self.lr,beta1 = 0.8,beta2 = 0.999,epsilon = 1e-7)
                grads = self.opt.compute_gradients(self.loss)
                gradients, variables = zip(*grads)
                capped_grads, _ = tf.clip_by_global_norm(gradients, config.grad_clip)
                self.train_op = self.opt.apply_gradients(zip(capped_grads, variables), global_step=self.global_step)
    def __init__(self,
                 config,
                 batch,
                 word_mat=None,
                 char_mat=None,
                 trainable=True,
                 opt=True,
                 demo=False,
                 graph=None):
        self.config = config
        self.demo = demo
        self.graph = graph if graph is not None else tf.Graph()
        self.trainable = trainable
        if trainable == True:
            self.c, self.q, self.ch, self.qh, self.alter, self.alterh, self.y1, self.qa_id = batch.get_next(
            )  # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len]
        else:
            self.c, self.q, self.ch, self.qh, self.alter, self.alterh, self.qa_id = batch.get_next(
            )  # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len]

        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False)
        self.dropout = tf.placeholder_with_default(0.5, (), name="dropout")

        # self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next()

        # self.word_unk = tf.get_variable("word_unk", shape = [config.glove_dim], initializer=initializer())
        self.word_mat = tf.get_variable("word_mat",
                                        initializer=tf.constant(
                                            word_mat, dtype=tf.float32),
                                        trainable=True)
        self.char_mat = tf.get_variable("char_mat",
                                        initializer=tf.constant(
                                            char_mat, dtype=tf.float32),
                                        trainable=True)

        self.c_mask = tf.cast(self.c, tf.bool)  #self.c为填充之后的长度是一致的,用0进行填充
        self.q_mask = tf.cast(self.q, tf.bool)

        self.alter1_mask = tf.cast(self.alter[:, 0, :], tf.bool)
        self.alter2_mask = tf.cast(self.alter[:, 1, :], tf.bool)
        self.alter3_mask = tf.cast(self.alter[:, 2, :], tf.bool)

        self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32),
                                   axis=1)  #表示每一个句子的实际长度
        self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)
        self.alter1_len = tf.reduce_sum(tf.cast(self.alter1_mask, tf.int32),
                                        axis=1)
        self.alter2_len = tf.reduce_sum(tf.cast(self.alter2_mask, tf.int32),
                                        axis=1)
        self.alter3_len = tf.reduce_sum(tf.cast(self.alter3_mask, tf.int32),
                                        axis=1)

        if opt:
            #此过程会按照batch的最大长度对扩充句子重新缩减
            N, CL = config.batch_size if not self.demo else 1, config.char_limit
            self.c_maxlen = tf.reduce_max(self.c_len)  #一个batch中最大的长度
            self.q_maxlen = tf.reduce_max(self.q_len)
            self.aletr1_maxlen = tf.reduce_max(self.alter1_len)
            self.aletr2_maxlen = tf.reduce_max(self.alter2_len)
            self.aletr3_maxlen = tf.reduce_max(self.alter3_len)

            self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen])
            self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen])
            self.alter1 = tf.slice(self.alter[:, 0, :], [0, 0],
                                   [N, self.aletr1_maxlen])
            self.alter2 = tf.slice(self.alter[:, 1, :], [0, 0],
                                   [N, self.aletr2_maxlen])
            self.alter3 = tf.slice(self.alter[:, 2, :], [0, 0],
                                   [N, self.aletr3_maxlen])

            self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen])
            self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen])
            self.alter1_mask = tf.slice(self.alter1_mask, [0, 0],
                                        [N, self.aletr1_maxlen])
            self.alter2_mask = tf.slice(self.alter2_mask, [0, 0],
                                        [N, self.aletr2_maxlen])
            self.alter3_mask = tf.slice(self.alter3_mask, [0, 0],
                                        [N, self.aletr3_maxlen])

            self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL])
            self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL])
            self.alter1h = tf.slice(self.alterh[:, 0, :, :], [0, 0, 0],
                                    [N, self.aletr1_maxlen, CL])
            self.alter2h = tf.slice(self.alterh[:, 1, :, :], [0, 0, 0],
                                    [N, self.aletr2_maxlen, CL])
            self.alter3h = tf.slice(self.alterh[:, 2, :, :], [0, 0, 0],
                                    [N, self.aletr3_maxlen, CL])
            # self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen])
            # self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen])
        else:
            if trainable:
                self.c_maxlen, self.q_maxlen, self.alter_maxlen = config.para_limit, config.ques_limit, config.alternatives_limit
            else:
                self.c_maxlen, self.q_maxlen, self.alter_maxlen = config.test_para_limit, config.test_ques_limit, config.alternatives_limit

        self.ch_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.ch, tf.bool), tf.int32),
                          axis=2), [-1])
        self.qh_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.qh, tf.bool), tf.int32),
                          axis=2), [-1])
        self.alterh1_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.alter1h, tf.bool), tf.int32),
                          axis=2), [-1])
        self.alterh2_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.alter2h, tf.bool), tf.int32),
                          axis=2), [-1])
        self.alterh3_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.alter3h, tf.bool), tf.int32),
                          axis=2), [-1])

        self.forward()
        total_params()

        if trainable:
            losses = tf.nn.softmax_cross_entropy_with_logits(
                logits=self.logits1, labels=self.y1)
            # losses2 = tf.nn.softmax_cross_entropy_with_logits(
            #     logits=logits2, labels=self.y2)
            self.loss = tf.reduce_mean(losses)
            if config.decay is not None:
                self.var_ema = tf.train.ExponentialMovingAverage(config.decay)
                ema_op = self.var_ema.apply(tf.trainable_variables())
                with tf.control_dependencies([ema_op]):
                    self.loss = tf.identity(self.loss)

                    self.assign_vars = []
                    for var in tf.global_variables():
                        v = self.var_ema.average(var)
                        if v:
                            self.assign_vars.append(tf.assign(var, v))
            self.lr = tf.minimum(
                config.init_lr, 0.001 / tf.log(999.) *
                tf.log(tf.cast(self.global_step, tf.float32) + 1))
            self.opt = tf.train.AdamOptimizer(learning_rate=self.lr,
                                              beta1=0.8,
                                              beta2=0.999,
                                              epsilon=1e-7)
            grads = self.opt.compute_gradients(self.loss)
            gradients, variables = zip(*grads)
            capped_grads, _ = tf.clip_by_global_norm(gradients,
                                                     config.grad_clip)
            self.train_op = self.opt.apply_gradients(
                zip(capped_grads, variables), global_step=self.global_step)
            self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=3)
예제 #7
0
    def __init__(self,
                 config,
                 batch,
                 word_mat=None,
                 char_mat=None,
                 filter_sizes=None,
                 embedding_size=None,
                 num_filters=None,
                 trainable=True,
                 opt=True,
                 demo=False,
                 graph=None):
        self.config = config
        self.demo = demo
        self.graph = graph if graph is not None else tf.Graph()
        self.trainable = trainable
        self.l2_loss = tf.constant(0.0)
        self.l2_reg_lambda = 0.7
        if trainable == True:
            self.c, self.q, self.ch, self.qh, self.input_y, self.qa_id, self.alternatives_tokens = batch.get_next(
            )  # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len]
        else:
            self.c, self.q, self.ch, self.qh, self.alternatives_tokens = batch.get_next(
            )  # self.y1 is (64, 3)self.alterh batch_size is[batch,3,alternative_len,chara_len]

        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False)
        self.dropout = tf.placeholder_with_default(0.5, (), name="dropout")

        # self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next()

        # self.word_unk = tf.get_variable("word_unk", shape = [config.glove_dim], initializer=initializer())
        self.word_mat = tf.get_variable("word_mat",
                                        initializer=tf.constant(
                                            word_mat, dtype=tf.float32),
                                        trainable=True)
        self.char_mat = tf.get_variable("char_mat",
                                        initializer=tf.constant(
                                            char_mat, dtype=tf.float32),
                                        trainable=True)

        self.c_mask = tf.cast(self.c, tf.bool)  #self.c为填充之后的长度是一致的,用0进行填充
        self.q_mask = tf.cast(self.q, tf.bool)

        self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32),
                                   axis=1)  #表示每一个句子的实际长度
        self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)

        if opt:
            #此过程会按照batch的最大长度对扩充句子重新缩减
            N, CL = config.batch_size if not self.demo else 1, config.char_limit
            self.c_maxlen = tf.reduce_max(self.c_len)  #一个batch中最大的长度
            self.q_maxlen = tf.reduce_max(self.q_len)

            self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen])
            self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen])

            self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen])
            self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen])

            self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL])
            self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL])

            # self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen])
            # self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen])
        else:
            if trainable:
                self.c_maxlen, self.q_maxlen, = config.para_limit, config.ques_limit,
            else:
                self.c_maxlen, self.q_maxlen = config.test_para_limit, config.test_ques_limit

        self.ch_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.ch, tf.bool), tf.int32),
                          axis=2), [-1])
        self.qh_len = tf.reshape(
            tf.reduce_sum(tf.cast(tf.cast(self.qh, tf.bool), tf.int32),
                          axis=2), [-1])

        self.forward(trainable)
        total_params()
예제 #8
0
    def __init__(self,
                 config,
                 word_mat=None,
                 char_mat=None,
                 test=False,
                 use_elmo=False,
                 use_cove=False):

        # hyper-parameter
        self.char_dim = config['char_dim']
        self.cont_limit = config['cont_limit'] if not test else 1000
        self.ques_limit = config['ques_limit'] if not test else 50
        self.char_limit = config['char_limit']
        self.ans_limit = config['ans_limit']
        self.filters = config['filters']
        self.num_heads = config['num_heads']
        self.batch_size = config['batch_size']
        self.l2_norm = config['l2_norm']
        self.decay = config['decay']
        self.learning_rate = config['learning_rate']
        self.grad_clip = config['grad_clip']
        self.use_elmo = use_elmo
        self.use_cove = use_cove
        self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")

        # embedding layer
        self.word_mat = tf.get_variable("word_mat",
                                        initializer=tf.constant(
                                            word_mat, dtype=tf.float32),
                                        trainable=False)
        self.char_mat = tf.get_variable("char_mat",
                                        initializer=tf.constant(
                                            char_mat, dtype=tf.float32),
                                        trainable=True)

        # input tensor
        self.contw_input_ = tf.placeholder(tf.int32, [None, self.cont_limit],
                                           "context_word")
        self.quesw_input_ = tf.placeholder(tf.int32, [None, self.ques_limit],
                                           "question_word")
        self.contc_input_ = tf.placeholder(
            tf.int32, [None, self.cont_limit, self.char_limit], "context_char")
        self.quesc_input_ = tf.placeholder(
            tf.int32, [None, self.ques_limit, self.char_limit],
            "question_char")
        self.y_start_ = tf.placeholder(tf.int32, [None, self.cont_limit + 1],
                                       "answer_start_index")
        self.y_end_ = tf.placeholder(tf.int32, [None, self.cont_limit + 1],
                                     "answer_end_index")
        self.contw_strings = tf.placeholder(tf.string, [None, self.cont_limit],
                                            'contw_strings')
        self.quesw_strings = tf.placeholder(tf.string, [None, self.ques_limit],
                                            'quesw_strings')

        self.c_mask = tf.cast(self.contw_input_, tf.bool)
        self.q_mask = tf.cast(self.quesw_input_, tf.bool)
        self.cont_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)
        self.ques_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)
        if self.use_elmo:
            elmo = hub.Module("https://tfhub.dev/google/elmo/2",
                              trainable=True)
            self.cont_elmo = elmo(inputs={
                "tokens": self.contw_strings,
                "sequence_len": self.cont_len
            },
                                  signature="tokens",
                                  as_dict=True)["elmo"]
            self.ques_elmo = elmo(inputs={
                "tokens": self.quesw_strings,
                "sequence_len": self.ques_len
            },
                                  signature="tokens",
                                  as_dict=True)["elmo"]

        # if self.use_cove:
        #     self.cove_model = load_model('Keras_CoVe_V2.h5')
        #     self.cove_model.trainable = False

        # slice for maxlen in each batch
        self.c_maxlen = tf.reduce_max(self.cont_len)
        self.q_maxlen = tf.reduce_max(self.ques_len)

        self.contw_input = tf.slice(self.contw_input_, [0, 0],
                                    [-1, self.c_maxlen])
        self.quesw_input = tf.slice(self.quesw_input_, [0, 0],
                                    [-1, self.q_maxlen])
        self.c_mask = tf.slice(self.c_mask, [0, 0], [-1, self.c_maxlen])
        self.q_mask = tf.slice(self.q_mask, [0, 0], [-1, self.q_maxlen])
        self.contc_input = tf.slice(self.contc_input_, [0, 0, 0],
                                    [-1, self.c_maxlen, self.char_limit])
        self.quesc_input = tf.slice(self.quesc_input_, [0, 0, 0],
                                    [-1, self.q_maxlen, self.char_limit])
        self.y_start = tf.slice(self.y_start_, [0, 0], [-1, self.c_maxlen + 1])
        self.y_end = tf.slice(self.y_end_, [0, 0], [-1, self.c_maxlen + 1])
        if self.use_elmo:
            self.cont_elmo = tf.slice(self.cont_elmo, [0, 0, 0],
                                      [-1, self.c_maxlen, 1024])
            self.ques_elmo = tf.slice(self.ques_elmo, [0, 0, 0],
                                      [-1, self.q_maxlen, 1024])

        # init model & complie
        self.build_model()
        total_params()
        self.complie()
예제 #9
0
    def __init__(self,
                 config,
                 iterator,
                 emb_mat,
                 trainable=True,
                 opt=True,
                 demo=False):
        self.config = config
        self.emb_mat = emb_mat
        self.demo = demo

        self.global_step = tf.get_variable(
            'global_step',
            shape=[],
            dtype=tf.int32,
            initializer=tf.constant_initializer(0),
            trainable=False)
        self.dropout = tf.placeholder_with_default(0.0, (), name="dropout")

        if self.demo:
            self.c = tf.placeholder(tf.int32, [None, config.test_x_limit],
                                    "context")
            self.q = tf.placeholder(tf.int32, [None, config.test_q_limit],
                                    "query")
            self.y = tf.placeholder(tf.float32, [None], "y")
            self.batch_size = tf.placeholder(tf.int32, None, "batch_size")
        else:
            self.c, self.q, self.y = iterator.get_next()

        with tf.variable_scope("opt"):
            self.c_mask = tf.cast(self.c, tf.bool)
            self.q_mask = tf.cast(self.q, tf.bool)
            self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)
            self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)

            if opt:
                self.c_maxlen = tf.reduce_max(self.c_len)
                self.q_maxlen = tf.reduce_max(self.q_len)
                self.c = tf.slice(self.c, [0, 0], [-1, self.c_maxlen])
                self.q = tf.slice(self.q, [0, 0], [-1, self.q_maxlen])
                self.c_mask = tf.slice(self.c_mask, [0, 0],
                                       [-1, self.c_maxlen])
                self.q_mask = tf.slice(self.q_mask, [0, 0],
                                       [-1, self.q_maxlen])
            else:
                self.c_maxlen, self.q_maxlen = config.x_limit, config.q_limit

        self._build_model()
        if not config.cudnn:
            total_params()

        if trainable:
            if config.l2_norm:
                regularizer = tf.contrib.layers.l2_regularizer(config.l2_norm)
                variables = tf.trainable_variables()
                variables = [v for v in variables
                             if "bias" not in v.name]  # don't regularize bias
                self.l2_loss = tf.contrib.layers.apply_regularization(
                    regularizer, variables)
                self.loss += self.l2_loss
            # self.loss -= self.l2_loss

            # optimizer
            self.lr = tf.placeholder_with_default(0.001, (), name="lr")
            self.opt = tf.train.AdadeltaOptimizer(learning_rate=self.lr)
            if config.grad_clip_flag:
                grads = self.opt.compute_gradients(self.loss)
                gradients, variables = zip(*grads)
                capped_grads, _ = tf.clip_by_global_norm(
                    gradients, config.grad_clip)
                self.train_op = self.opt.apply_gradients(
                    zip(capped_grads, variables), global_step=self.global_step)
            else:
                self.train_op = self.opt.minimize(self.loss,
                                                  global_step=self.global_step)

            # ema
            if config.decay:
                self.ema = tf.train.ExponentialMovingAverage(config.decay)
                ema_op = self.ema.apply(tf.trainable_variables())
                with tf.control_dependencies([self.train_op]):
                    self.train_op = tf.group(ema_op)
예제 #10
0
    def __init__(self,
                 config,
                 batch,
                 word_mat=None,
                 char_mat=None,
                 trainable=True,
                 opt=False,
                 demo=False,
                 graph=None):
        self.config = config
        self.demo = demo
        self.graph = graph if graph is not None else tf.Graph()

        with self.graph.as_default():

            self.global_step = tf.get_variable(
                name='global_step', shape=[], dtype=tf.int32,
                initializer=tf.constant_initializer(0), trainable=False)
            self.dropout = tf.placeholder_with_default(input=0.0, shape=[], name='dropout')

            # Model Input
            if self.demo:
                self.c = tf.placeholder(shape=[None, config.test_para_limit],
                                        name='context', dtype=tf.int32)

                self.q = tf.placeholder(shape=[None, config.test_ques_limit],
                                        name='question', dtype=tf.int32)

                self.ch = tf.placeholder(shape=[None, config.test_para_limit, config.char_limit],
                                         name='context_char', dtype=tf.int32)

                self.qh = tf.placeholder(shape=[None, config.test_ques_limit, config.char_limit],
                                         name='question_char', dtype=tf.int32)

                self.ans = tf.placeholder(shape=[None, config.test_para_limit],
                                          name='answer', dtype=tf.int32)

                self.cans = tf.placeholder(shape=[None, config.num_cans, config.test_para_limit],
                                           name='candidates', dtype=tf.int32)

                self.y_true = tf.placeholder(shape=[None, config.num_cans],
                                             name='y_true', dtype=tf.int32)

            else:
                self.c, self.q, self.ch, self.qh, self.ans, self.cans, self.y_true = batch.get_next()

            self.word_mat = tf.get_variable(    # pre-trained word embeddings
                name='word_mat', initializer=tf.constant(word_mat, dtype=tf.float32), trainable=False)
            self.char_mat = tf.get_variable(    # trainable char embeddings
                name='char_mat', initializer=tf.constant(char_mat, dtype=tf.float32))

            self.c_mask = tf.cast(self.c, tf.bool)  # [batch_size, c_maxlen]
            self.q_mask = tf.cast(self.q, tf.bool)  # [batch_size, q_maxlen]
            self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1)  # [batch_size]
            self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1)  # [batch_size]

            if opt:
                N, CL = config.batch_size if not self.demo else 1, config.char_limit
                self.c_maxlen = tf.reduce_max(self.c_len)
                self.q_maxlen = tf.reduce_max(self.q_len)
                self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen])
                self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen])
                self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen])
                self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen])
                self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL])
                self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL])
                self.ans = tf.slice(self.ans, [0, 0], [N, self.c_maxlen])
                self.cans = tf.slice(self.cans, [0, 0, 0], [N, config.num_cans, self.c_maxlen])    # not needed
                self.y_true = tf.slice(self.y_true, [0, 0], [N, config.num_cans])
            else:
                self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit

            self.ch_len = tf.reshape(
                tf.reduce_sum(tf.cast(
                    tf.cast(self.ch, tf.bool), tf.int32), axis=2),
                shape=[-1])
            self.qn_len = tf.reshape(
                tf.reduce_sum(tf.cast(
                    tf.cast(self.qh, tf.bool), tf.int32), axis=2),
                shape=[-1])

            self.forward()
            total_params()

            if trainable:
                self.lr = tf.minimum(config.learning_rate, 0.001 / tf.log(999.0) *
                                     tf.log(tf.cast(self.global_step, tf.float32) + 1))
                self.opt = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.8, beta2=0.999, epsilon=1e-7)
                grads = self.opt.compute_gradients(self.loss)
                gradients, variables = zip(*grads)
                capped_grads, _ = tf.clip_by_global_norm(gradients, config.grad_clip)
                self.train_op = self.opt.apply_gradients(
                    zip(capped_grads, variables), global_step=self.global_step)