def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     embed_char = outputs[0]
     embed_word = outputs[1]
     if self.wclstm_embed_type == "ATTNENTION":
         x_word = L.TimeDistributed(
             SelfAttention(K.int_shape(embed_word)[-1]))(embed_word)
         x_word_shape = K.int_shape(x_word)
         x_word = L.Reshape(target_shape=(x_word_shape[:2],
                                          x_word_shape[2] *
                                          x_word_shape[3]))
         x_word = L.Dense(self.embed_size,
                          activation=self.activate_mid)(x_word)
     # elif self.wclstm_embed_type == "SHORT":
     else:
         x_word = L.Lambda(lambda x: x[:, :, 0, :])(embed_word)
     outputs_concat = L.Concatenate(axis=-1)([embed_char, x_word])
     # LSTM or GRU
     if self.rnn_type == "LSTM":
         rnn_cell = L.LSTM
     elif self.rnn_type == "CuDNNLSTM":
         rnn_cell = L.CuDNNLSTM
     elif self.rnn_type == "CuDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # Bi-LSTM-CRF
     for nrl in range(self.num_rnn_layers):
         x = L.Bidirectional(
             rnn_cell(
                 units=self.rnn_unit * (nrl + 1),
                 return_sequences=True,
                 activation=self.activate_mid,
             ))(outputs_concat)
         outputs = L.Dropout(self.dropout)(x)
     if self.use_crf:
         x = L.Dense(units=self.label,
                     activation=self.activate_end)(outputs)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         self.outputs = L.TimeDistributed(
             L.Dense(units=self.label,
                     activation=self.activate_end))(outputs)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Exemple #2
0
 def build_model(self, inputs, outputs):
     if self.embed_type in ["xlnet"]:
         # x = L.Lambda(lambda x: x[:, -2:-1, :])(outputs)  # xlnet获取CLS
         x = L.Lambda(lambda x: x[:, -1], name="Token-CLS")(outputs)
     else:
         # x = L.Lambda(lambda x: x[:, 0:1, :])(outputs)  # bert-like获取CLS
         x = L.Lambda(lambda x: x[:, 0], name="Token-CLS")(outputs)
     # x = L.Flatten()(x)
     # 最后就是softmax
     self.outputs = L.Dense(
         self.label,
         activation=self.activate_end,
         kernel_initializer=keras.initializers.TruncatedNormal(
             stddev=0.02))(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Exemple #3
0
 def build_model(self, inputs, outputs):
     # rnn type, RNN的类型
     if self.rnn_type == "LSTM":
         layer_cell = L.LSTM
     else:
         layer_cell = L.GRU
     # backword, 反向
     x_backwords = layer_cell(
         units=self.rnn_unit,
         return_sequences=True,
         kernel_regularizer=keras.regularizers.l2(0.32 * 0.1),
         recurrent_regularizer=keras.regularizers.l2(0.32),
         go_backwards=True)(outputs)
     x_backwords_reverse = L.Lambda(lambda x: K.reverse(x, axes=1))(
         x_backwords)
     # fordword, 前向
     x_fordwords = layer_cell(
         units=self.rnn_unit,
         return_sequences=True,
         kernel_regularizer=keras.regularizers.l2(0.32 * 0.1),
         recurrent_regularizer=keras.regularizers.l2(0.32),
         go_backwards=False)(outputs)
     # concatenate, 拼接
     x_feb = L.Concatenate(axis=2)(
         [x_fordwords, outputs, x_backwords_reverse])
     # dropout, 随机失活
     x_feb = L.Dropout(self.dropout)(x_feb)
     # Concatenate, 拼接后的embedding_size
     dim_2 = K.int_shape(x_feb)[2]
     x_feb_reshape = L.Reshape((self.length_max, dim_2, 1))(x_feb)
     # n-gram, conv, maxpool, 使用n-gram进行卷积和池化
     conv_pools = []
     for filter in self.filters_size:
         conv = L.Conv2D(
             filters=self.filters_num,
             kernel_size=(filter, dim_2),
             padding='valid',
             kernel_initializer='normal',
             activation='relu',
         )(x_feb_reshape)
         pooled = L.MaxPooling2D(
             pool_size=(self.length_max - filter + 1, 1),
             strides=(1, 1),
             padding='valid',
         )(conv)
         conv_pools.append(pooled)
     # concatenate, 拼接TextCNN
     x = L.Concatenate()(conv_pools)
     x = L.Dropout(self.dropout)(x)
     # dense-mid, 中间全连接到中间的隐藏元
     x = L.Flatten()(x)
     x = L.Dense(units=min(max(self.label, 64), self.embed_size),
                 activation=self.activate_mid)(x)
     x = L.Dropout(self.dropout)(x)
     # dense-end, 最后一层, dense到label
     self.outputs = L.Dense(units=self.label,
                            activation=self.activate_end)(x)
     self.model = M.Model(inputs=inputs, outputs=self.outputs)
     self.model.summary(132)
Exemple #4
0
dev_data = txt_read(path_dev)

len_train_rate = int(len(train_data) * rate)
len_dev_rate = int(len(dev_data) * rate)

train_data = train_data[:len_train_rate]
dev_data = dev_data[:len_dev_rate]

logger.info("训练/验证语料读取完成")
# 数据预处理类初始化
preprocess_xy = ListPrerocessXY(embed,
                                train_data,
                                path_dir=path_model_dir,
                                length_max=length_max)

x = L.Lambda(lambda x: x[:, 0], name="Token-CLS")(embed.model.output)

# 最后就是softmax
outputs = L.Dense(
    len(preprocess_xy.l2i),
    activation="softmax",
    kernel_initializer=keras.initializers.TruncatedNormal(stddev=0.02))(x)
model = M.Model(embed.model.input, outputs)
model.summary(132)

model.compile(optimizer=O.Adam(lr=1e-5),
              loss="categorical_crossentropy",
              metrics=["accuracy"])

len_train_data = len(train_data)
lg_train = ListGenerator(train_data,