def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     embed_char = outputs[0]
     embed_word = outputs[1]
     if self.wclstm_embed_type == "ATTNENTION":
         x_word = L.TimeDistributed(
             SelfAttention(K.int_shape(embed_word)[-1]))(embed_word)
         x_word_shape = K.int_shape(x_word)
         x_word = L.Reshape(target_shape=(x_word_shape[:2],
                                          x_word_shape[2] *
                                          x_word_shape[3]))
         x_word = L.Dense(self.embed_size,
                          activation=self.activate_mid)(x_word)
     # elif self.wclstm_embed_type == "SHORT":
     else:
         x_word = L.Lambda(lambda x: x[:, :, 0, :])(embed_word)
     outputs_concat = L.Concatenate(axis=-1)([embed_char, x_word])
     # LSTM or GRU
     if self.rnn_type == "LSTM":
         rnn_cell = L.LSTM
     elif self.rnn_type == "CuDNNLSTM":
         rnn_cell = L.CuDNNLSTM
     elif self.rnn_type == "CuDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # Bi-LSTM-CRF
     for nrl in range(self.num_rnn_layers):
         x = L.Bidirectional(
             rnn_cell(
                 units=self.rnn_unit * (nrl + 1),
                 return_sequences=True,
                 activation=self.activate_mid,
             ))(outputs_concat)
         outputs = L.Dropout(self.dropout)(x)
     if self.use_crf:
         x = L.Dense(units=self.label,
                     activation=self.activate_end)(outputs)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         self.outputs = L.TimeDistributed(
             L.Dense(units=self.label,
                     activation=self.activate_end))(outputs)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Beispiel #2
0
 def build_model(self, inputs, outputs):
     # rnn type, RNN的类型
     if self.rnn_type == "LSTM":
         layer_cell = L.LSTM
     else:
         layer_cell = L.GRU
     # backword, 反向
     x_backwords = layer_cell(
         units=self.rnn_unit,
         return_sequences=True,
         kernel_regularizer=keras.regularizers.l2(0.32 * 0.1),
         recurrent_regularizer=keras.regularizers.l2(0.32),
         go_backwards=True)(outputs)
     x_backwords_reverse = L.Lambda(lambda x: K.reverse(x, axes=1))(
         x_backwords)
     # fordword, 前向
     x_fordwords = layer_cell(
         units=self.rnn_unit,
         return_sequences=True,
         kernel_regularizer=keras.regularizers.l2(0.32 * 0.1),
         recurrent_regularizer=keras.regularizers.l2(0.32),
         go_backwards=False)(outputs)
     # concatenate, 拼接
     x_feb = L.Concatenate(axis=2)(
         [x_fordwords, outputs, x_backwords_reverse])
     # dropout, 随机失活
     x_feb = L.Dropout(self.dropout)(x_feb)
     # Concatenate, 拼接后的embedding_size
     dim_2 = K.int_shape(x_feb)[2]
     x_feb_reshape = L.Reshape((self.length_max, dim_2, 1))(x_feb)
     # n-gram, conv, maxpool, 使用n-gram进行卷积和池化
     conv_pools = []
     for filter in self.filters_size:
         conv = L.Conv2D(
             filters=self.filters_num,
             kernel_size=(filter, dim_2),
             padding='valid',
             kernel_initializer='normal',
             activation='relu',
         )(x_feb_reshape)
         pooled = L.MaxPooling2D(
             pool_size=(self.length_max - filter + 1, 1),
             strides=(1, 1),
             padding='valid',
         )(conv)
         conv_pools.append(pooled)
     # concatenate, 拼接TextCNN
     x = L.Concatenate()(conv_pools)
     x = L.Dropout(self.dropout)(x)
     # dense-mid, 中间全连接到中间的隐藏元
     x = L.Flatten()(x)
     x = L.Dense(units=min(max(self.label, 64), self.embed_size),
                 activation=self.activate_mid)(x)
     x = L.Dropout(self.dropout)(x)
     # dense-end, 最后一层, dense到label
     self.outputs = L.Dense(units=self.label,
                            activation=self.activate_end)(x)
     self.model = M.Model(inputs=inputs, outputs=self.outputs)
     self.model.summary(132)
Beispiel #3
0
def squash_bojone(x, axis=-1):
    """
       activation of squash
    :param x: vector
    :param axis: int
    :return: vector
    """
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True)
    scale = K.sqrt(s_squared_norm + K.epsilon())
    return x / scale
Beispiel #4
0
 def build_model(self, inputs, outputs):
     x = L.Dense(units=self.label, activation=self.activate_mid)(outputs)
     self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                       name="crf_bert4keras")
     self.outputs = self.CRF(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
     self.trans = K.eval(self.CRF.trans).tolist()
     self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
     self.metrics = [
         self.CRF.dense_accuracy
         if self.use_onehot else self.CRF.sparse_accuracy
     ]
Beispiel #5
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # LSTM or GRU
     if self.rnn_type == "LSTM":
         rnn_cell = L.LSTM
     elif self.rnn_type == "CuDNNLSTM":
         rnn_cell = L.CuDNNLSTM
     elif self.rnn_type == "CuDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # Bi-LSTM-LAN
     for nrl in range(self.num_rnn_layers):
         x = L.Bidirectional(rnn_cell(units=self.rnn_unit*(nrl+1),
                                      return_sequences=True,
                                      activation=self.activate_mid,
                                      ))(outputs)
         x_att = SelfAttention(K.int_shape(x)[-1])(x)
         outputs = L.Concatenate()([x, x_att])
         outputs = L.Dropout(self.dropout)(outputs)
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(outputs)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy]
     else:
         self.outputs = L.TimeDistributed(L.Dense(units=self.label, activation=self.activate_end))(outputs)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Beispiel #6
0
 def call(self, x):
     WQ = K.dot(x, self.kernel[0])
     WK = K.dot(x, self.kernel[1])
     WV = K.dot(x, self.kernel[2])
     # print("WQ.shape",WQ.shape)
     # print("K.permute_dimensions(WK, [0, 2, 1]).shape",K.permute_dimensions(WK, [0, 2, 1]).shape)
     QK = K.batch_dot(WQ, K.permute_dimensions(WK, [0, 2, 1]))
     QK = QK / (64**0.5)
     QK = K.softmax(QK)
     # print("QK.shape",QK.shape)
     V = K.batch_dot(QK, WV)
     return V
Beispiel #7
0
 def build_model(self, inputs, outputs):
     x = L.SpatialDropout1D(self.dropout_spatial)(outputs)
     x = SelfAttention(K.int_shape(outputs)[-1])(x)
     x_max = L.GlobalMaxPooling1D()(x)
     x_avg = L.GlobalAveragePooling1D()(x)
     x = L.Concatenate()([x_max, x_avg])
     x = L.Dropout(self.dropout)(x)
     x = L.Flatten()(x)
     # dense-mid
     x = L.Dense(units=min(max(self.label, 64), self.embed_size),
                 activation=self.activate_mid)(x)
     x = L.Dropout(self.dropout)(x)
     # dense-end, 最后一层, dense到label
     self.outputs = L.Dense(units=self.label,
                            activation=self.activate_end)(x)
     self.model = M.Model(inputs=inputs, outputs=self.outputs)
     self.model.summary(132)
Beispiel #8
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # CuDNNGRU or GRU
     x = None
     if self.rnn_type.upper() == "CUDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # Bi-GRU
     for nrl in range(self.num_rnn_layers):
         x = L.Bidirectional(
             rnn_cell(
                 units=self.rnn_unit,
                 return_sequences=True,
                 activation=self.activate_mid,
             ))(outputs)
         x = L.Dropout(self.dropout)(x)
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(x)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         self.outputs = L.TimeDistributed(
             L.Dense(units=self.label, activation=self.activate_end))(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Beispiel #9
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # LSTM or GRU
     if self.rnn_type == "LSTM":
         rnn_cell = L.LSTM
     elif self.rnn_type == "CuDNNLSTM":
         rnn_cell = L.CuDNNLSTM
     elif self.rnn_type == "CuDNNGRU":
         rnn_cell = L.CuDNNGRU
     else:
         rnn_cell = L.GRU
     # CNN-LSTM, 提取n-gram特征和最大池化, 一般不用平均池化
     conv_pools = []
     for i in range(len(self.filters_size)):
         conv = L.Conv1D(
             name="conv-{0}-{1}".format(i, self.filters_size[i]),
             kernel_size=self.filters_size[i],
             activation=self.activate_mid,
             filters=self.filters_num,
             padding='same',
         )(outputs)
         conv_rnn = L.Bidirectional(
             rnn_cell(
                 name="bi-lstm-{0}-{1}".format(i, self.filters_size[i]),
                 activation=self.activate_mid,
                 return_sequences=True,
                 units=self.rnn_unit,
             ))(conv)
         x_dropout = L.Dropout(rate=self.dropout,
                               name="dropout-{0}-{1}".format(
                                   i, self.filters_size[i]))(conv_rnn)
         conv_pools.append(x_dropout)
     # 拼接
     x = L.Concatenate(axis=-1)(conv_pools)
     x = L.Dropout(self.dropout)(x)
     # CRF or Dense
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(x)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         self.outputs = L.TimeDistributed(
             L.Dense(units=self.label,
                     activation=self.activate_end,
                     name="dense-output"))(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)
Beispiel #10
0
    def call(self, u_vecs):
        if self.share_weights:
            u_hat_vecs = K.conv1d(u_vecs, self.W)
        else:
            u_hat_vecs = K.local_conv1d(u_vecs, self.W, [1], [1])

        batch_size = K.shape(u_vecs)[0]
        input_num_capsule = K.shape(u_vecs)[1]
        u_hat_vecs = K.reshape(u_hat_vecs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        u_hat_vecs = K.permute_dimensions(u_hat_vecs, (0, 2, 1, 3))
        # final u_hat_vecs.shape = [None, num_capsule, input_num_capsule, dim_capsule]

        b = K.zeros_like(
            u_hat_vecs[:, :, :,
                       0])  # shape = [None, num_capsule, input_num_capsule]
        outputs = None
        for i in range(self.routings):
            b = K.permute_dimensions(
                b, (0, 2, 1))  # shape = [None, input_num_capsule, num_capsule]
            c = K.softmax(b)
            c = K.permute_dimensions(c, (0, 2, 1))
            b = K.permute_dimensions(b, (0, 2, 1))
            outputs = self.activation(K.batch_dot(c, u_hat_vecs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(outputs, u_hat_vecs, [2, 3])

        return outputs
Beispiel #11
0
    def call(self, x, mask=None):
        # computes a probability distribution over the timesteps
        # uses "max trick" for numerical stability
        # reshape is done to avoid issue with Tensorflow
        # and 1-dimensional weights
        logits = K.dot(x, self.W)
        x_shape = K.shape(x)
        logits = K.reshape(logits, (x_shape[0], x_shape[1]))
        ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True))

        # masked timesteps have zero weight
        if mask is not None:
            mask = K.cast(mask, K.floatx())
            ai = ai * mask
        att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon())
        weighted_input = x * K.expand_dims(att_weights)
        result = K.sum(weighted_input, axis=1)
        if self.return_attention:
            return [result, att_weights]
        return result
Beispiel #12
0
 def build_model(self, inputs, outputs):
     """
     build_model.
     Args:
         inputs: tensor, input of model
         outputs: tensor, output of model
     Returns:
         None
     """
     # CNN, 提取n-gram特征和最大池化, DGCNN膨胀卷积(IDCNN)
     conv_pools = []
     for i in range(len(self.filters_size)):
         conv = L.Conv1D(
             name="conv-{0}-{1}".format(i, self.filters_size[i]),
             dilation_rate=self.atrous_rates[0],
             kernel_size=self.filters_size[i],
             activation=self.activate_mid,
             filters=self.filters_num,
             padding="SAME",
         )(outputs)
         for j in range(len(self.atrous_rates) - 1):
             conv = L.Conv1D(
                 name="conv-{0}-{1}-{2}".format(i, self.filters_size[i], j),
                 dilation_rate=self.atrous_rates[j],
                 kernel_size=self.filters_size[i],
                 activation=self.activate_mid,
                 filters=self.filters_num,
                 padding="SAME",
             )(conv)
             conv = L.Dropout(
                 name="dropout-{0}-{1}-{2}".format(i, self.filters_size[i],
                                                   j),
                 rate=self.dropout,
             )(conv)
         conv_pools.append(conv)
     # 拼接
     x = L.Concatenate(axis=-1)(conv_pools)
     x = L.Dropout(self.dropout)(x)
     # CRF or Dense
     if self.use_crf:
         x = L.Dense(units=self.label, activation=self.activate_end)(x)
         self.CRF = ConditionalRandomField(self.crf_lr_multiplier,
                                           name="crf_bert4keras")
         self.outputs = self.CRF(x)
         self.trans = K.eval(self.CRF.trans).tolist()
         self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss
         self.metrics = [
             self.CRF.dense_accuracy
             if self.use_onehot else self.CRF.sparse_accuracy
         ]
     else:
         x = L.Bidirectional(
             L.GRU(
                 activation=self.activate_mid,
                 return_sequences=True,
                 units=self.rnn_unit,
                 name="bi-gru",
             ))(x)
         self.outputs = L.TimeDistributed(
             L.Dense(
                 activation=self.activate_end,
                 name="dense-output",
                 units=self.label,
             ))(x)
     self.model = M.Model(inputs, self.outputs)
     self.model.summary(132)