Ejemplo n.º 1
0
def bi_lstm(X_inputs,X_tag_inputs):
    nil_vars = set()
    word_embed_layer = Embedding(
                                 params=word_weights, ids=X_inputs,
                                 keep_prob=1.0, name='word_embed_layer')
    tag_embed_layer = Embedding(
            params=tag_weights, ids=X_tag_inputs,
            keep_prob=1.0, name='tag_embed_layer')
    nil_vars.add(word_embed_layer.params.name)
    nil_vars.add(tag_embed_layer.params.name)
    sentence_input = tf.concat(
		values=[word_embed_layer.output, tag_embed_layer.output], axis=2)
    inputs=sentence_input
    lstmCell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
    lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=1)
    result, _ = tf.nn.dynamic_rnn(lstmCell, inputs, dtype=tf.float32)
    result = tf.transpose(result, [1, 0, 2])
    output = tf.gather(result, int(result.get_shape()[0]) - 1)  
    return output
Ejemplo n.º 2
0
    def __init__(self, max_len, word_weights, tag_weights, result_path=None, label_voc=None):
        """
        Initilize model
        Args:
            max_len: int, 句子最大长度
            word_weights: np.array, shape=[|V_words|, w2v_dim],词向量
            tag_weights: np.array, shape=[|V_tags|, t2v_dim],标记向量
            result_path: str, 模型评价结果存放路径
            label_voc: dict
        """
        self._result_path = result_path
        self._label_voc = label_voc
        self._label_voc_rev = dict()
        for key in self._label_voc:
            value = self._label_voc[key]
            self._label_voc_rev[value] = key

        # input placeholders
        self.input_sentence_ph = tf.placeholder(
            tf.int32, shape=(None, max_len), name='input_sentence_ph')
        self.input_tag_ph = tf.placeholder(tf.int32, shape=(None, max_len), name='input_tag_ph')
        self.label_ph = tf.placeholder(tf.int32, shape=(None,), name='label_ph')
        self.keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob')
        self.word_keep_prob_ph = tf.placeholder(tf.float32, name='word_keep_prob')
        self.tag_keep_prob_ph = tf.placeholder(tf.float32, name='tag_keep_prob')

        # embedding layers
        self.nil_vars = set()
        word_embed_layer = Embedding(
            params=word_weights, ids=self.input_sentence_ph,
            keep_prob=self.word_keep_prob_ph, name='word_embed_layer')
        tag_embed_layer = Embedding(
            params=tag_weights, ids=self.input_tag_ph,
            keep_prob=self.tag_keep_prob_ph, name='tag_embed_layer')
        self.nil_vars.add(word_embed_layer.params.name)
        self.nil_vars.add(tag_embed_layer.params.name)

        # sentence representation
        sentence_input = tf.concat(
            values=[word_embed_layer.output, tag_embed_layer.output], axis=2)

        # sentence conv
        conv_layer = Convolutional1D(
            input_data=sentence_input, filter_length=3,
            nb_filter=1000, activation='relu', name='conv_layer')

        # dense layer
        dense_input_drop = tf.nn.dropout(conv_layer.output, self.keep_prob_ph)
        self.dense_layer = SoftmaxDense(
            input_data=dense_input_drop, input_dim=conv_layer.output_dim,
            output_dim=config.NB_LABELS, name='output_layer')

        self.loss = self.dense_layer.loss(self.label_ph) + \
            0.001*tf.nn.l2_loss(self.dense_layer.weights)
        optimizer = tf.train.AdamOptimizer()  # Adam
        grads_and_vars = optimizer.compute_gradients(self.loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # train op
        self.train_op = optimizer.apply_gradients(
            nil_grads_and_vars, name='train_op', global_step=global_step)

        # pre op
        self.pre_op = self.dense_layer.get_pre_y()

        # summary
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init model
        init = tf.global_variables_initializer()
        self.sess.run(init)
def bi_lstm(X_inputs, X_tag_inputs):
    """build the bi-LSTMs network. Return the y_pred"""
    # X_inputs.shape = [batchsize, timestep_size]  ->  inputs.shape = [batchsize, timestep_size, embedding_size]
    #inputs = tf.nn.embedding_lookup(embedding, X_inputs)
    nil_vars = set()
    word_embed_layer = Embedding(params=word_weights,
                                 ids=X_inputs,
                                 keep_prob=1.0,
                                 name='word_embed_layer')
    tag_embed_layer = Embedding(params=tag_weights,
                                ids=X_tag_inputs,
                                keep_prob=1.0,
                                name='tag_embed_layer')
    nil_vars.add(word_embed_layer.params.name)
    nil_vars.add(tag_embed_layer.params.name)

    sentence_input = tf.concat(
        values=[word_embed_layer.output, tag_embed_layer.output], axis=2)
    inputs = sentence_input

    lstmCell = tf.contrib.rnn.BasicLSTMCell(hidden_size)
    lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=1)
    result, _ = tf.nn.dynamic_rnn(lstmCell, inputs, dtype=tf.float32)
    result = tf.transpose(result, [1, 0, 2])
    output = tf.gather(result, int(result.get_shape()[0]) - 1)
    """# ** 1.构建前向后向多层 LSTM
    cell_fw = rnn.MultiRNNCell([lstm_cell() for _ in range(layer_num)], state_is_tuple=True)
    cell_bw = rnn.MultiRNNCell([lstm_cell() for _ in range(layer_num)], state_is_tuple=True)
  
    # ** 2.初始状态
    initial_state_fw = cell_fw.zero_state(batch_size, tf.float32)
    initial_state_bw = cell_bw.zero_state(batch_size, tf.float32)  
    
    # 下面两部分是等价的
    # **************************************************************
    # ** 把 inputs 处理成 rnn.static_bidirectional_rnn 的要求形式
    # ** 文档说明
    # inputs: A length T list of inputs, each a tensor of shape
    # [batch_size, input_size], or a nested tuple of such elements.
    # *************************************************************
    # Unstack to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    # inputs.shape = [batchsize, timestep_size, embedding_size]  ->  timestep_size tensor, each_tensor.shape = [batchsize, embedding_size]
    inputs = tf.unstack(inputs, timestep_size, 1)
    # ** 3.bi-lstm 计算(tf封装)  一般采用下面 static_bidirectional_rnn 函数调用。
     try:
         outputs, _, _ = rnn.static_bidirectional_rnn(cell_fw, cell_bw, inputs, 
                         initial_state_fw = initial_state_fw, initial_state_bw = initial_state_bw, dtype=tf.float32)
     except Exception: # Old TensorFlow version only returns outputs not states
         outputs = rnn.static_bidirectional_rnn(cell_fw, cell_bw, inputs, 
                         initial_state_fw = initial_state_fw, initial_state_bw = initial_state_bw, dtype=tf.float32)
     output = tf.reshape(tf.concat(outputs, 1), [-1, hidden_size * 2])
     output = tf.gather(result, int(result.get_shape()[0]) - 1)

     #***********************************************************"""

    # ***********************************************************
    # ** 3. bi-lstm 计算(展开)
    #inputs = tf.unstack(inputs, timestep_size, 1)
    """with tf.variable_scope('bidirectional_rnn'):
        # *** 下面,两个网络是分别计算 output 和 state 
        # Forward direction
        outputs_fw = list()
        state_fw = initial_state_fw
        with tf.variable_scope('fw'):
            for timestep in range(timestep_size):
                if timestep > 0:
                    tf.get_variable_scope().reuse_variables()
                (output_fw, state_fw) = cell_fw(inputs[:, timestep, :], state_fw)
                outputs_fw.append(output_fw)
        
        # backward direction
        outputs_bw = list()
        state_bw = initial_state_bw
        with tf.variable_scope('bw') as bw_scope:
            inputs = tf.reverse(inputs, [1])
            for timestep in range(timestep_size):
                if timestep > 0:
                    tf.get_variable_scope().reuse_variables()
                (output_bw, state_bw) = cell_bw(inputs[:, timestep, :], state_bw)
                outputs_bw.append(output_bw)
        # *** 然后把 output_bw 在 timestep 维度进行翻转
        # outputs_bw.shape = [timestep_size, batch_size, hidden_size]
        outputs_bw = tf.reverse(outputs_bw, [0])
        # 把两个oupputs 拼成 [timestep_size, batch_size, hidden_size*2]
        output = tf.concat([outputs_fw, outputs_bw], 2)
        output = tf.transpose(output, perm=[1,0,2])
        output = tf.reshape(output, [-1, hidden_size*2])"""
    # ***********************************************************
    return output  # [-1, hidden_size*2]
Ejemplo n.º 4
0
    def __init__(self, max_len, word_weights, char_weights, tag_weights, model_path=None, label_voc=None):
        """
        Initilize model
        Args:
            max_len: int, 句子最大长度
            word_weights: np.array, shape=[|V_words|, w2v_dim],词向量
            tag_weights: np.array, shape=[|V_tags|, t2v_dim],标记向量
            result_path: str, 模型评价结果存放路径
            label_voc: dict
        """
        tf.reset_default_graph() 
        self._model_path = model_path
        self._label_voc = label_voc
        self._label_voc_rev = dict()
        
        for key in self._label_voc:
            value = self._label_voc[key]
            self._label_voc_rev[value] = key

        # input placeholders
        self.input_sentence_ph = tf.placeholder(
            tf.int32, shape=(None, max_len), name='input_sentence_ph')
        self.input_tag_ph = tf.placeholder(tf.int32, shape=(None, max_len), name='input_tag_ph')
        self.label_ph = tf.placeholder(tf.int32, shape=(None,), name='label_ph')
        self.keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob')
        self.word_keep_prob_ph = tf.placeholder(tf.float32, name='word_keep_prob')
        #self.word_keep_prob_ph = tf.placeholder(tf.float32, name='word_keep_prob')
        self.tag_keep_prob_ph = tf.placeholder(tf.float32, name='tag_keep_prob')
        
        # shape = (batch size, max length of sentence, max length of word)
        self.input_char_ph = tf.placeholder(tf.int32, shape=[None, None, None],
                        name="char_ids")
        # shape = (batch_size, max_length of sentence)
        self.word_lengths = tf.placeholder(tf.int32, shape=[None, None],
                        name="word_lengths")

        # embedding layers
        self.nil_vars = set()
        word_embed_layer = Embedding(
            params=word_weights, ids=self.input_sentence_ph,
            keep_prob=self.word_keep_prob_ph, name='word_embed_layer')
        
        tag_embed_layer = Embedding(
            params=tag_weights, ids=self.input_tag_ph,
            keep_prob=self.tag_keep_prob_ph, name='tag_embed_layer')
        
        self.nil_vars.add(word_embed_layer.params.name)
        self.nil_vars.add(tag_embed_layer.params.name)

        if config.use_chars:
            # get char embeddings matrix
            char_embed_layer = Embedding(
            params=char_weights, ids=self.input_char_ph, name='char_embed_layer')
            self.nil_vars.add(char_embed_layer.params.name)
            # put the time dimension on axis=1
            
            char_embeddings = char_embed_layer.output
            s = tf.shape(char_embeddings)
            char_embeddings = tf.reshape(char_embeddings,
                    shape=[s[0]*s[1], s[-2], config.C2V_DIM])
            word_lengths = tf.reshape(self.word_lengths, shape=[s[0]*s[1]])
            # bi lstm on chars
            cell_fw = tf.contrib.rnn.LSTMCell(128, forget_bias=1.0, #config.hidden_size_char,
                    state_is_tuple=True)
            cell_bw = tf.contrib.rnn.LSTMCell(128, forget_bias=1.0, #config.hidden_size_char,
                    state_is_tuple=True)
            _output = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw, cell_bw, char_embeddings,time_major=False,
                    sequence_length=word_lengths, dtype=tf.float32)

            # read and concat output
            _, ((_, output_fw), (_, output_bw)) = _output
            output = tf.concat([output_fw, output_bw], axis=-1)

            # shape = (batch size, max sentence length, char hidden size)
            output = tf.reshape(output,
                    shape=[s[0], s[1], 2*128]) #self.config.hidden_size_char])

            other_embedding = tf.concat([output, tag_embed_layer.output], axis=-1)
        else :
            other_embedding = tag_embed_layer.output
        
        # sentence representation
        sentence_input = tf.concat(
            values=[word_embed_layer.output, other_embedding], axis=2)

        ####################
        sentence_input1  = tf.transpose(sentence_input,[0,2,1])
        conv_layer1 = Convolutional1D(
            input_data=sentence_input1, filter_length=3,
            nb_filter=1000, activation='relu', name='conv_layer')
        
        # sentence conv
        conv_layer = Convolutional1D(
            input_data=sentence_input, filter_length=3,
            nb_filter=1000, activation='relu', name='conv_layer')

        # dense layer
        conv_output = tf.concat([conv_layer.output, conv_layer1.output], axis=-1)
        dense_input_drop = tf.nn.dropout(conv_output, self.keep_prob_ph)
        self.dense_layer = SoftmaxDense(
            input_data=dense_input_drop, input_dim=conv_layer.output_dim + conv_layer1.output_dim,
            output_dim=config.NB_LABELS, name='output_layer')

        self.loss = self.dense_layer.loss(self.label_ph) + \
            0.001*tf.nn.l2_loss(self.dense_layer.weights)
        optimizer = tf.train.AdamOptimizer()  # Adam
        grads_and_vars = optimizer.compute_gradients(self.loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))
        global_step = tf.Variable(0, name='global_step', trainable=False)

        # train op
        self.train_op = optimizer.apply_gradients(
            nil_grads_and_vars, name='train_op', global_step=global_step)

        # pre op
        self.pre_op = self.dense_layer.get_pre_y()
        #self.pre_ouput_op = self.dense_layer.output()
        self.proba_op = self.dense_layer.get_pre_proba()

        # summary
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init model
        init = tf.global_variables_initializer()
        self.sess.run(init)