Esempio n. 1
0
def block_net(units: int,
              d_model: int,
              num_heads: int,
              dropout: float,
              d_type: tf.dtypes.DType = tf.float32,
              name: str = "block_net") -> tf.keras.Model:
    """ BlockNet

    :param units: 词汇量大小
    :param d_model: 深度,词嵌入维度
    :param num_heads: 注意力头数
    :param dropout: dropout的权重
    :param d_type: 运算精度
    :param name: 名称
    :return: BlockNet
    """
    query = tf.keras.Input(shape=(None, d_model),
                           dtype=d_type,
                           name="{}_query".format(name))
    key = tf.keras.Input(shape=(None, d_model),
                         dtype=d_type,
                         name="{}_key".format(name))
    padding_mask = tf.keras.Input(shape=(1, 1, None),
                                  dtype=d_type,
                                  name="{}_padding_mask".format(name))

    attention, _ = MultiHeadAttention(d_model, num_heads)(q=query,
                                                          k=key,
                                                          v=key,
                                                          mask=padding_mask)
    # attention, _ = scaled_dot_product_attention(q=query, k=key, v=key)
    attention = tf.keras.layers.Dropout(
        rate=dropout, dtype=d_type,
        name="{}_attention_dropout".format(name))(attention)
    attention = tf.keras.layers.LayerNormalization(
        epsilon=1e-6,
        dtype=d_type,
        name="{}_attention_layer_norm".format(name))(query + attention)

    outputs = tf.keras.layers.Dense(
        units=units,
        activation="relu",
        dtype=d_type,
        name="{}_dense_act".format(name))(attention)
    outputs = tf.keras.layers.Dense(units=d_model,
                                    dtype=d_type,
                                    name="{}_dense".format(name))(outputs)
    outputs = tf.keras.layers.Dropout(
        rate=dropout, dtype=d_type,
        name="{}_outputs_dropout".format(name))(outputs)
    outputs = tf.keras.layers.LayerNormalization(
        epsilon=1e-6, dtype=d_type,
        name="{}_outputs_layer_norm".format(name))(attention + outputs)

    return tf.keras.Model(inputs=[query, key, padding_mask],
                          outputs=outputs,
                          name=name)
Esempio n. 2
0
def encoder_layer(units: int,
                  d_model: int,
                  num_heads: int,
                  dropout: float,
                  d_type: tf.dtypes.DType = tf.float32,
                  name: str = "encoder_layer") -> tf.keras.Model:
    """
    :param units: 词汇量大小
    :param d_model: 深度,词嵌入维度
    :param num_heads: 注意力头数
    :param dropout: dropout的权重
    :param d_type: 运算精度
    :param name: 名称
    :return:
    """
    inputs = tf.keras.Input(shape=(None, d_model),
                            dtype=d_type,
                            name="{}_inputs".format(name))
    padding_mask = tf.keras.Input(shape=(1, 1, None),
                                  dtype=d_type,
                                  name="{}_padding_mask".format(name))

    attention, _ = MultiHeadAttention(d_model, num_heads)(q=inputs,
                                                          k=inputs,
                                                          v=inputs,
                                                          mask=padding_mask)
    attention = tf.keras.layers.Dropout(
        rate=dropout, dtype=d_type,
        name="{}_attention_dropout".format(name))(attention)
    attention = tf.keras.layers.LayerNormalization(
        epsilon=1e-6,
        dtype=d_type,
        name="{}_attention_layer_norm".format(name))(inputs + attention)

    outputs = tf.keras.layers.Dense(
        units=units,
        activation="relu",
        dtype=d_type,
        name="{}_dense_act".format(name))(attention)
    outputs = tf.keras.layers.Dense(units=d_model,
                                    dtype=d_type,
                                    name="{}_dense".format(name))(outputs)
    outputs = tf.keras.layers.Dropout(
        rate=dropout, dtype=d_type,
        name="{}_outputs_dropout".format(name))(outputs)
    outputs = tf.keras.layers.LayerNormalization(
        epsilon=1e-6, dtype=d_type,
        name="{}_outputs_layer_norm".format(name))(attention + outputs)

    return tf.keras.Model(inputs=[inputs, padding_mask],
                          outputs=outputs,
                          name=name)