def pre_net(vocab_size: int, embedding_dim: int, dropout: float, d_type: tf.dtypes.DType = tf.float32, name: str = "preNet") -> tf.keras.Model: """ PreNet :param vocab_size: token大小 :param embedding_dim: 词嵌入维度 :param dropout: dropout的权重 :param d_type: 运算精度 :param name: 名称 :return: PreNet """ query_inputs = tf.keras.Input(shape=(None, ), name="{}_query_inputs".format(name)) embeddings = tf.keras.layers.Embedding( input_dim=vocab_size, output_dim=embedding_dim, dtype=d_type, name="{}_embeddings".format(name))(query_inputs) embeddings *= tf.math.sqrt(x=tf.cast(x=embedding_dim, dtype=d_type), name="{}_sqrt".format(name)) pos_encoding = positional_encoding(position=vocab_size, d_model=embedding_dim, d_type=d_type) embeddings = embeddings + pos_encoding[:, :tf.shape(embeddings)[1], :] outputs = tf.keras.layers.Dropout( rate=dropout, dtype=d_type, name="{}_dropout".format(name))(embeddings) return tf.keras.Model(inputs=query_inputs, outputs=outputs, name=name)
def encoder(vocab_size: int, num_layers: int, units: int, embedding_dim: int, num_heads: int, dropout: float, d_type: tf.dtypes.DType = tf.float32, name: str = "encoder") -> tf.keras.Model: """ 文本句子编码 :param vocab_size: token大小 :param num_layers: 编码解码的数量 :param units: 单元大小 :param embedding_dim: 词嵌入维度 :param num_heads: 多头注意力的头部层数量 :param dropout: dropout的权重 :param d_type: 运算精度 :param name: 名称 :return: """ inputs = tf.keras.Input(shape=(None, ), name="{}_inputs".format(name), dtype=d_type) padding_mask = tf.keras.layers.Lambda( create_padding_mask, output_shape=(1, 1, None), name="{}_padding_mask".format(name))(inputs) embeddings = tf.keras.layers.Embedding( input_dim=vocab_size, output_dim=embedding_dim, dtype=d_type, name="{}_embeddings".format(name))(inputs) embeddings *= tf.math.sqrt(x=tf.cast(x=embedding_dim, dtype=d_type), name="{}_sqrt".format(name)) pos_encoding = positional_encoding(position=vocab_size, d_model=embedding_dim, d_type=d_type) embeddings = embeddings + pos_encoding[:, :tf.shape(embeddings)[1], :] outputs = tf.keras.layers.Dropout( rate=dropout, dtype=d_type, name="{}_dropout".format(name))(embeddings) for i in range(num_layers): outputs = encoder_layer(units=units, d_model=embedding_dim, num_heads=num_heads, dropout=dropout, d_type=d_type, name="{}_layer_{}".format( name, i))([outputs, padding_mask]) return tf.keras.Model(inputs=inputs, outputs=[outputs, padding_mask], name=name)