Ejemplo n.º 1
0
    def build_model(self):

        # init ph, weights and dropout rate
        self.input_feature_ph_dict = dict()
        # 建立特征权重字典
        self.weight_dropout_ph_dict = dict()
        self.feature_weight_dict = dict()
        self.nil_vars = set()
        self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph')
        # label ph
        self.input_label_ph = tf.placeholder(
            dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph')
        # 读入特征,并搭建特征结构
        for feature_name in self.feature_names:

            # input ph
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32, shape=[None, self.sequence_length],
                name='input_feature_ph_%s' % feature_name)

            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)

            # init feature weights, 初始化未指定的
            if feature_name not in self.feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self.feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight, name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self.feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
            self.nil_vars.add(self.feature_weight_dict[feature_name].name)

            # init dropout rate, 初始化未指定的
            if feature_name not in self.feature_weight_dropout_dict:
                self.feature_weight_dropout_dict[feature_name] = 0.

        # init embeddings
        # 对特征进行编码并连接
        self.embedding_features = []
        for feature_name in self.feature_names:
            embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup(
                self.feature_weight_dict[feature_name],
                ids=self.input_feature_ph_dict[feature_name],
                name='embedding_feature_%s' % feature_name),
                keep_prob=1.-self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)

        # concat all features
        # 多个词拼接成一句话
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=2, name='input_features')

        # cnn

        cnn_output=self.IDCNN_layer(input_features)

        # # bi-lstm
        #
        # if self.rnn_unit == 'lstm':
        #     fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
        #     bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
        # elif self.rnn_unit == 'gru':
        #     fw_cell = rnn.GRUCell(self.nb_hidden)
        #     bw_cell = rnn.GRUCell(self.nb_hidden)
        # else:
        #     raise ValueError('rnn_unit must in (lstm, gru)!')
        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self.feature_names[0]])
        # # print(input_features)
        # rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
        #     fw_cell, bw_cell, input_features, scope='bi-lstm',
        #     dtype=tf.float32, sequence_length=self.sequence_actual_length)
        # # shape = [batch_size, max_len, nb_hidden*2]
        # # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s
        # lstm_output = tf.nn.dropout(
        #     tf.concat(rnn_outputs, axis=2, name='lstm_output'),
        #     keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout')
        #
        # # softmax
        # # 重新规整输出形式
        # self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden*2], name='outputs')

        self.softmax_w = tf.get_variable('softmax_w', [self.cnn_output_width, self.nb_classes])
        self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes])
        self.logits = tf.reshape(
            tf.matmul(cnn_output, self.softmax_w) + self.softmax_b,
            shape=[-1, self.sequence_length, self.nb_classes], name='logits')

        # 计算loss
        self.loss = self.compute_loss()
        self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b))

        self.total_loss = self.loss + self.l2_loss

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.total_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self.clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self.clip)
            self.train_op = optimizer.apply_gradients(
                zip(gradients, variables), name='train_op', global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(
                nil_grads_and_vars, name='train_op', global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)
Ejemplo n.º 2
0
    def build_model(self):
        for feature_name in self._feature_names:

            # input ph
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32,
                shape=[None, self._sequence_length],
                name='input_feature_ph_%s' % feature_name)

            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)

            # init feature weights, 初始化未指定的
            if feature_name not in self._feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self._feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight,
                    name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self._feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
                self.nil_vars.add(self.feature_weight_dict[feature_name].name)

            # init dropout rate, 初始化未指定的
            if feature_name not in self._feature_weight_dropout_dict:
                self._feature_weight_dropout_dict[feature_name] = 0.
        # char feature
        if self._use_char_feature:
            # char feature weights
            feature_weight = uniform_tensor(
                shape=self._feature_weight_shape_dict['char'],
                name='f_w_%s' % 'char')
            self.feature_weight_dict['char'] = tf.Variable(
                initial_value=feature_weight,
                name='feature_weigth_%s' % 'char')
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.input_feature_ph_dict['char'] = tf.placeholder(
                dtype=tf.int32,
                shape=[None, self._sequence_length, self._word_length],
                name='input_feature_ph_%s' % 'char')

        # init embeddings
        self.embedding_features = []
        for feature_name in self._feature_names:
            print(self.input_feature_ph_dict[feature_name].shape)
            embedding_feature = tf.nn.dropout(
                tf.nn.embedding_lookup(
                    self.feature_weight_dict[feature_name],
                    ids=self.input_feature_ph_dict[feature_name],
                    name='embedding_feature_%s' % feature_name),
                keep_prob=1. - self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)
            print(embedding_feature.shape)
        # char embedding
        if self._use_char_feature:
            char_embedding_feature = tf.nn.embedding_lookup(
                self.feature_weight_dict['char'],
                ids=self.input_feature_ph_dict['char'],
                name='embedding_feature_%s' % 'char')
            # conv
            couv_feature_char = MultiConvolutional3D(
                char_embedding_feature,
                filter_length_list=self._conv_filter_len_list,
                nb_filter_list=self._conv_filter_size_list).output
            couv_feature_char = tf.nn.dropout(couv_feature_char,
                                              keep_prob=1 -
                                              self.cnn_dropout_rate_ph)

        # concat all features
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=len(self._feature_names), name='input_features')
        print('input features shape', input_features.shape)

        if self._use_char_feature:
            input_features = tf.concat([input_features, couv_feature_char],
                                       axis=-1)

        # multi bi-lstm layer
        _fw_cells = []
        _bw_cells = []
        for _ in range(self._num_layers):
            fw, bw = self._get_rnn_unit(self._rnn_unit)
            _fw_cells.append(
                tf.nn.rnn_cell.DropoutWrapper(fw,
                                              output_keep_prob=1 -
                                              self.rnn_dropout_rate_ph))
            _bw_cells.append(
                tf.nn.rnn_cell.DropoutWrapper(bw,
                                              output_keep_prob=1 -
                                              self.rnn_dropout_rate_ph))
        fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells)
        bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells)

        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self._feature_names[0]],
            dim=1)

        print(self.sequence_actual_length.shape)
        input_size = input_features.shape[-1]
        print('input_features shape ', input_features.shape)
        rnn_inputs = tf.reshape(input_features,
                                [-1, self._sequence_length, input_size])
        print('rnn inputs shape ', rnn_inputs.shape)
        rnn_lengths = tf.reshape(self.sequence_actual_length, [-1])

        # todo: add encoder output
        rnn_outputs, rnn_state = tf.nn.bidirectional_dynamic_rnn(
            fw_cell,
            bw_cell,
            rnn_inputs,
            scope='bi-lstm',
            dtype=tf.float32,
            sequence_length=rnn_lengths)

        # shape = [batch_size, max_len, nb_hidden*2]
        rnn_outputs = tf.concat(rnn_outputs, axis=2, name='lstm_output')
        rnn_outputs = tf.nn.dropout(rnn_outputs,
                                    keep_prob=1. - self.dropout_rate_ph,
                                    name='lstm_output_dropout')
        rnn_hidden = self.merge_bi_rnn_state(rnn_state).h
        #
        batch_size = tf.shape(input_features)[0]
        #
        print('rnn outputs shape', rnn_outputs.shape)
        print('rnn hidden shape', rnn_hidden.shape)
        #
        # rnn_outputs = tf.reshape(rnn_outputs,
        #                          [batch_size, turn_size, self._sequence_length, self._nb_hidden * 2])
        #
        # rnn_hidden = tf.reshape(rnn_hidden, [batch_size, turn_size, self._nb_hidden * 2])
        # # rnn_hidden = tf.nn.dropout(rnn_hidden, keep_prob=1. - self.dropout_rate_ph)
        # print('rnn outputs shape', rnn_outputs.shape)
        # print('rnn hidden shape', rnn_hidden.shape)
        #
        # # context rnn
        # ctx_cell = rnn.BasicLSTMCell(self._nb_hidden * 2, forget_bias=1., state_is_tuple=True)
        # ctx_lengths = get_sequence_actual_length(self.input_feature_ph_dict[self._feature_names[0]], dim=[1, 2])
        # print("ctx inputs shape", rnn_hidden.shape)
        # print('ctx lengths shape', ctx_lengths.shape)
        #
        # ctx_outputs, _ = tf.nn.dynamic_rnn(cell=ctx_cell,
        #                                    inputs=rnn_hidden,
        #                                    sequence_length=ctx_lengths,
        #                                    dtype=tf.float32)
        # # predict intents
        intent_logits = tf.layers.dense(rnn_hidden, 24, activation=None)  #!!!!
        label_intents = tf.reshape(self.input_label_intent, [-1])
        print('intent_logits shape', intent_logits.shape)
        print('input_label_intent shape', self.input_label_intent.shape)
        intent_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=label_intents, logits=intent_logits)
        intent_mask = tf.reshape(tf.sign(self.sequence_actual_length), [-1])
        intent_mask = tf.cast(intent_mask, dtype=tf.float32)
        print('intent_mask shape', intent_mask.shape)
        self.intent_loss = tf.reduce_sum(
            intent_loss * intent_mask) / tf.reduce_sum(intent_mask)

        pred_intents = tf.argmax(intent_logits, axis=1)
        self.pred_intents = tf.reshape(pred_intents, [-1])
        print('pred_intents shape', self.pred_intents.shape)
        correct_preds = tf.equal(tf.cast(pred_intents, dtype=tf.int32),
                                 tf.cast(label_intents, dtype=tf.int32))

        self.intent_accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32) * intent_mask) \
                               / tf.reduce_sum(intent_mask)

        self.intent_count = tf.cast(tf.reduce_sum(intent_mask), tf.int32)

        self.intent_logits = intent_logits

        # predict slots
        # batch * turn * hidden
        # print('ctx outputs shape', ctx_outputs.shape)
        # ctx_outputs = tf.reshape(ctx_outputs, [batch_size, turn_size, self._nb_hidden * 2])
        # rnn_intent_outputs = [ctx_outputs for _ in range(self._sequence_length)]
        # rnn_intent_outputs = tf.stack(rnn_intent_outputs, axis=2)
        # print('rnn intent outputs', rnn_intent_outputs.shape)
        #
        # ctx_h = tf.reshape(ctx_outputs[:, :, :self._nb_hidden], [-1, self._nb_hidden])
        # ctx_c = tf.reshape(ctx_outputs[:, :, self._nb_hidden:], [-1, self._nb_hidden])
        #
        # init_fw_hidden = []
        # init_bw_hidden = []
        # for _ in range(self._num_layers):
        #     lstm_hidden = rnn.LSTMStateTuple(h=ctx_h, c=ctx_c)
        #     init_fw_hidden += [lstm_hidden]
        #     init_bw_hidden += [lstm_hidden]
        # init_fw_hidden = tuple(init_fw_hidden)
        # init_bw_hidden = tuple(init_bw_hidden)

        # slot_outputs = tf.concat([rnn_outputs, rnn_intent_outputs], axis=3)
        # slot_outputs = tf.reshape(slot_outputs, [-1, self._nb_hidden * 4])

        # run the rnn again with init state

        # slot_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
        #     fw_cell, bw_cell, rnn_inputs,
        #     scope='bi-lstm',
        #     initial_state_fw=init_fw_hidden,
        #     initial_state_bw=init_bw_hidden,
        #     dtype=tf.float32,
        #     sequence_length=rnn_lengths)

        # slot_outputs = tf.concat(slot_outputs, axis=2, name='slot_output')

        slot_logits = tf.layers.dense(rnn_outputs,
                                      self._nb_classes,
                                      activation=None)
        self.slot_logits = tf.reshape(
            slot_logits, [batch_size, self._sequence_length, self._nb_classes])
        print('slot logits shape', self.slot_logits.shape)
        slot_labels = tf.reshape(self.input_label_ph,
                                 [-1, self._sequence_length])
        slot_logits = tf.reshape(self.slot_logits,
                                 [-1, self._sequence_length, self._nb_classes])
        slot_lengths = tf.reshape(self.sequence_actual_length, [-1])
        log_likelihood, self.transition_params = tf.contrib.crf.crf_log_likelihood(
            slot_logits, slot_labels, slot_lengths)
        print('transition params shape', self.transition_params.shape)
        print('log likelihood loss', log_likelihood.shape)
        self.slot_loss = tf.reduce_sum(
            -log_likelihood * intent_mask) / tf.reduce_sum(intent_mask)
        self.total_loss = self.intent_loss + self.slot_loss
        self.train_loss = self.slot_loss + self.intent_loss * self.intent_weight_ph

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate)
        self.train_op = optimizer.minimize(self.train_loss)
        grads_and_vars = optimizer.compute_gradients(self.train_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self._clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self._clip)
            self.train_op = optimizer.apply_gradients(zip(
                gradients, variables),
                                                      name='train_op',
                                                      global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(nil_grads_and_vars,
                                                      name='train_op',
                                                      global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)
Ejemplo n.º 3
0
    def build_model(self):
        for feature_name in self._feature_names:

            # input ph
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32, shape=[None, self._sequence_length],
                name='input_feature_ph_%s' % feature_name)

            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)

            # init feature weights, 初始化未指定的
            if feature_name not in self._feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self._feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight, name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self._feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
                self.nil_vars.add(self.feature_weight_dict[feature_name].name)

            # init dropout rate, 初始化未指定的
            if feature_name not in self._feature_weight_dropout_dict:
                self._feature_weight_dropout_dict[feature_name] = 0.
        # char feature
        if self._use_char_feature:
            # char feature weights
            feature_weight = uniform_tensor(
                shape=self._feature_weight_shape_dict['char'], name='f_w_%s' % 'char')
            self.feature_weight_dict['char'] = tf.Variable(
                initial_value=feature_weight, name='feature_weigth_%s' % 'char')
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.nil_vars.add(self.feature_weight_dict['char'].name)
            self.input_feature_ph_dict['char'] = tf.placeholder(
                dtype=tf.int32, shape=[None, self._sequence_length, self._word_length],
                name='input_feature_ph_%s' % 'char')

        # init embeddings
        self.embedding_features = []
        for feature_name in self._feature_names:
            embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup(
                self.feature_weight_dict[feature_name],
                ids=self.input_feature_ph_dict[feature_name],
                name='embedding_feature_%s' % feature_name),
                keep_prob=1.-self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)
        # char embedding
        if self._use_char_feature:
            char_embedding_feature = tf.nn.embedding_lookup(
                self.feature_weight_dict['char'],
                ids=self.input_feature_ph_dict['char'],
                name='embedding_feature_%s' % 'char')
            # conv
            couv_feature_char = MultiConvolutional3D(
                char_embedding_feature, filter_length_list=self._conv_filter_len_list,
                nb_filter_list=self._conv_filter_size_list).output
            couv_feature_char = tf.nn.dropout(
                couv_feature_char, keep_prob=1-self.cnn_dropout_rate_ph)

        # concat all features
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=2, name='input_features')
        if self._use_char_feature:
            input_features = tf.concat([input_features, couv_feature_char], axis=-1)

        # multi bi-lstm layer
        _fw_cells = []
        _bw_cells = []
        for _ in range(self._num_layers):
            fw, bw = self._get_rnn_unit(self._rnn_unit)
            _fw_cells.append(tf.nn.rnn_cell.DropoutWrapper(fw, output_keep_prob=1-self.rnn_dropout_rate_ph))
            _bw_cells.append(tf.nn.rnn_cell.DropoutWrapper(bw, output_keep_prob=1-self.rnn_dropout_rate_ph))
        fw_cell = tf.nn.rnn_cell.MultiRNNCell(_fw_cells)
        bw_cell = tf.nn.rnn_cell.MultiRNNCell(_bw_cells)

        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self._feature_names[0]])
        rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            fw_cell, bw_cell, input_features, scope='bi-lstm',
            dtype=tf.float32, sequence_length=self.sequence_actual_length)
        # shape = [batch_size, max_len, nb_hidden*2]
        lstm_output = tf.nn.dropout(
            tf.concat(rnn_outputs, axis=2, name='lstm_output'),
            keep_prob=1.-self.dropout_rate_ph, name='lstm_output_dropout')

        # softmax
        hidden_size = int(lstm_output.shape[-1])
        self.outputs = tf.reshape(lstm_output, [-1, hidden_size], name='outputs')
        self.softmax_w = tf.get_variable('softmax_w', [hidden_size, self._nb_classes])
        self.softmax_b = tf.get_variable('softmax_b', [self._nb_classes])
        self.logits = tf.reshape(
            tf.matmul(self.outputs, self.softmax_w) + self.softmax_b,
            shape=[-1, self._sequence_length, self._nb_classes], name='logits')

        # 计算loss
        self.loss = self.compute_loss()
        self.l2_loss = self._l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b))

        self.total_loss = self.loss + self.l2_loss

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self._learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.total_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self._clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self._clip)
            self.train_op = optimizer.apply_gradients(
                zip(gradients, variables), name='train_op', global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(
                nil_grads_and_vars, name='train_op', global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)
Ejemplo n.º 4
0
    def build_model(self):

        # init ph, weights and dropout rate
        self.input_feature_ph_dict = dict()

        self.input_char_ph_dict=dict()
        self.char_weight_dict=dict()
        # 建立特征权重字典
        self.weight_dropout_ph_dict = dict()
        self.weight_dropout_ph_dict['char']=tf.placeholder(tf.float32,name='dropout_char')
        self.feature_weight_dict = dict()
        self.nil_vars = set()
        self.dropout_rate_ph = tf.placeholder(tf.float32, name='dropout_rate_ph')
        # label ph
        self.input_label_ph = tf.placeholder(
            dtype=tf.int32, shape=[None, self.sequence_length], name='input_label_ph')
        # 读入特征,并搭建特征结构 ph
        for feature_name in self.feature_names:
            # input ph,每个feature建立一次placeholder
            self.input_feature_ph_dict[feature_name] = tf.placeholder(
                dtype=tf.int32, shape=[None, self.sequence_length],
                name='input_feature_ph_%s' % feature_name)
            # dropout rate ph
            self.weight_dropout_ph_dict[feature_name] = tf.placeholder(
                tf.float32, name='dropout_ph_%s' % feature_name)
            # init feature weights, 初始化随机变量
            if feature_name not in self.feature_init_weight_dict:
                feature_weight = uniform_tensor(
                    shape=self.feature_weight_shape_dict[feature_name],
                    name='f_w_%s' % feature_name)
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=feature_weight, name='feature_weigth_%s' % feature_name)
            else:
                self.feature_weight_dict[feature_name] = tf.Variable(
                    initial_value=self.feature_init_weight_dict[feature_name],
                    name='feature_weight_%s' % feature_name)
            self.nil_vars.add(self.feature_weight_dict[feature_name].name)
            # init dropout rate, 初始化未指定的
            if feature_name not in self.feature_weight_dropout_dict:
                self.feature_weight_dropout_dict[feature_name] = 0.

        # 初始化字向量空间
        self.input_char_ph_dict['char']=tf.placeholder(dtype=tf.int32,shape=[None,self.sequence_length,self.word_length],
                                                       name='char_input')
        self.input_char_flat=tf.reshape(self.input_char_ph_dict['char'],[-1,self.word_length*self.sequence_length],
                                        name='input_x_char_flat')
        self.char_weight_dict['char']=tf.Variable(initial_value=self.char_init_weight_dict['char'],name='char_init')

        self.char_embedding_init=tf.nn.dropout(tf.nn.embedding_lookup(
            self.char_weight_dict['char'],
            ids=self.input_char_flat,
            name='char_embedding',),
            keep_prob=1.-self.weight_dropout_ph_dict['char'],
            name='char_dropout')
        with tf.name_scope('char_conv'):
            self.filter_shape=[self.filter_size,self.char_embedding_size,self.num_filter]
            self.W_conv=tf.Variable(tf.truncated_normal(self.filter_shape,stddev=0.1),name='W_conv')
            self.b_conv=tf.Variable(tf.constant(0.1,shape=[self.num_filter]),name='b_conv')
            self.conv=tf.nn.conv1d(self.char_embedding_init,self.W_conv,stride=1,padding='SAME',name='conv')
            self.h_expand=tf.expand_dims(self.conv,-1)
            self.pooled=tf.nn.max_pool(self.h_expand,ksize=[1,self.sequence_length*self.word_length,1,1],
                                       strides=[1,self.word_length,1,1],padding='SAME',name='pooled')
            self.char_pool_flat=tf.reshape(self.pooled,[-1,self.sequence_length,self.num_filter],name='char_pool_flat')
        # init embeddings
        # 对特征进行编码并连接
        self.embedding_features = []
        for feature_name in self.feature_names:
            embedding_feature = tf.nn.dropout(tf.nn.embedding_lookup(
                self.feature_weight_dict[feature_name],
                ids=self.input_feature_ph_dict[feature_name],
                name='embedding_feature_%s' % feature_name),
                keep_prob=1. - self.weight_dropout_ph_dict[feature_name],
                name='embedding_feature_dropout_%s' % feature_name)
            self.embedding_features.append(embedding_feature)

        # concat all features
        # 多个词拼接成一句话
        input_features = self.embedding_features[0] if len(self.embedding_features) == 1 \
            else tf.concat(values=self.embedding_features, axis=2, name='input_features')
        input_features=tf.concat([input_features,self.char_pool_flat],axis=2)
        # bi-lstm

        if self.rnn_unit == 'lstm':
            fw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
            bw_cell = rnn.BasicLSTMCell(self.nb_hidden, forget_bias=1., state_is_tuple=True)
        elif self.rnn_unit == 'gru':
            fw_cell = rnn.GRUCell(self.nb_hidden)
            bw_cell = rnn.GRUCell(self.nb_hidden)
        else:
            raise ValueError('rnn_unit must in (lstm, gru)!')
        # 计算self.input_features[feature_names[0]]的实际长度(0为padding值)
        self.sequence_actual_length = get_sequence_actual_length(  # 每个句子的实际长度
            self.input_feature_ph_dict[self.feature_names[0]])
        # print(input_features)
        rnn_outputs, _ = tf.nn.bidirectional_dynamic_rnn(
            fw_cell, bw_cell, input_features, scope='bi-lstm',
            dtype=tf.float32, sequence_length=self.sequence_actual_length)
        # shape = [batch_size, max_len, nb_hidden*2]
        # dropout 之后由[m,n]变成[1,1]输入输出维度保持不变s
        lstm_output = tf.nn.dropout(
            tf.concat(rnn_outputs, axis=2, name='lstm_output'),
            keep_prob=1. - self.dropout_rate_ph, name='lstm_output_dropout')

        # softmax
        # 重新规整输出形式
        self.outputs = tf.reshape(lstm_output, [-1, self.nb_hidden * 2], name='outputs')
        self.softmax_w = tf.get_variable('softmax_w', [self.nb_hidden * 2, self.nb_classes])
        self.softmax_b = tf.get_variable('softmax_b', [self.nb_classes])
        self.logits = tf.reshape(
            tf.matmul(self.outputs, self.softmax_w) + self.softmax_b,
            shape=[-1, self.sequence_length, self.nb_classes], name='logits')

        # 计算loss
        self.loss = self.compute_loss()
        self.l2_loss = self.l2_rate * (tf.nn.l2_loss(self.softmax_w) + tf.nn.l2_loss(self.softmax_b))

        self.total_loss = self.loss + self.l2_loss

        # train op
        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
        grads_and_vars = optimizer.compute_gradients(self.total_loss)
        nil_grads_and_vars = []
        for g, v in grads_and_vars:
            if v.name in self.nil_vars:
                nil_grads_and_vars.append((zero_nil_slot(g), v))
            else:
                nil_grads_and_vars.append((g, v))

        global_step = tf.Variable(0, name='global_step', trainable=False)
        if self.clip:
            # clip by global norm
            gradients, variables = zip(*nil_grads_and_vars)
            gradients, _ = tf.clip_by_global_norm(gradients, self.clip)
            self.train_op = optimizer.apply_gradients(
                zip(gradients, variables), name='train_op', global_step=global_step)
        else:
            self.train_op = optimizer.apply_gradients(
                nil_grads_and_vars, name='train_op', global_step=global_step)

        # TODO sess, visible_device_list待修改
        gpu_options = tf.GPUOptions(visible_device_list='0', allow_growth=True)
        self.sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))

        # init all variable
        init = tf.global_variables_initializer()
        self.sess.run(init)