Beispiel #1
0
    def __init__(
        self,
        units: int,
        num_heads: int,
        attention_dropout_rate: float = 0.0,
        density: float = 0.2,
        unidirectional: bool = False,
        use_key_relative_position: bool = False,
        use_value_relative_position: bool = False,
        max_relative_position: Optional[int] = None,
        heads_share_relative_embedding: bool = False,
    ) -> None:
        super().__init__()

        if units % num_heads != 0:
            raise ValueError(
                f"number of units {units} should be proportional to "
                f"number of attention heads {num_heads}.")

        self.num_heads = num_heads
        self.units = units
        self.attention_dropout_rate = attention_dropout_rate
        self.unidirectional = unidirectional
        self.use_key_relative_position = use_key_relative_position
        self.use_value_relative_position = use_value_relative_position
        self.relative_length = max_relative_position
        if self.relative_length is not None:
            self.relative_length += 1  # include current time
        self.heads_share_relative_embedding = heads_share_relative_embedding

        self._depth = units // self.num_heads

        # process queries
        self._query_dense_layer = RandomlyConnectedDense(units=units,
                                                         use_bias=False,
                                                         density=density)
        # process keys
        self._key_dense_layer = RandomlyConnectedDense(units=units,
                                                       use_bias=False,
                                                       density=density)
        # process values
        self._value_dense_layer = RandomlyConnectedDense(units=units,
                                                         use_bias=False,
                                                         density=density)
        # process attention output
        self._output_dense_layer = RandomlyConnectedDense(units=units,
                                                          density=density)

        self._create_relative_embeddings()
Beispiel #2
0
def test_randomly_connected_dense_all_inputs_connected():
    layer = RandomlyConnectedDense(density=0.0, units=2, use_bias=False)
    # Create a unit vector [1, 0, 0, 0, ...]
    x = np.zeros(10)
    x[0] = 1.0
    # For every standard basis vector
    for _ in range(10):
        x = np.roll(x, 1)
        y = layer(np.expand_dims(x, 0))
        assert tf.reduce_sum(y).numpy() != 0.0
Beispiel #3
0
    def __init__(
        self,
        units: int,
        num_heads: int,
        filter_units: int,
        dropout_rate: float = 0.1,
        attention_dropout_rate: float = 0.0,
        density: float = 0.2,
        unidirectional: bool = False,
        use_key_relative_position: bool = False,
        use_value_relative_position: bool = False,
        max_relative_position: Optional[int] = None,
        heads_share_relative_embedding: bool = False,
    ) -> None:
        super().__init__()

        self._layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
        self._mha = MultiHeadAttention(
            units,
            num_heads,
            attention_dropout_rate,
            density,
            unidirectional,
            use_key_relative_position,
            use_value_relative_position,
            max_relative_position,
            heads_share_relative_embedding,
        )
        self._dropout = tf.keras.layers.Dropout(dropout_rate)

        self._ffn_layers = [
            tf.keras.layers.LayerNormalization(epsilon=1e-6),
            RandomlyConnectedDense(
                units=filter_units,
                activation=tfa.activations.gelu,
                density=density),  # (batch_size, length, filter_units)
            tf.keras.layers.Dropout(dropout_rate),
            RandomlyConnectedDense(
                units=units, density=density),  # (batch_size, length, units)
            tf.keras.layers.Dropout(dropout_rate),
        ]
Beispiel #4
0
    def __init__(
        self,
        num_layers: int,
        units: int,
        num_heads: int,
        filter_units: int,
        reg_lambda: float,
        dropout_rate: float = 0.1,
        attention_dropout_rate: float = 0.0,
        density: float = 0.2,
        unidirectional: bool = False,
        use_key_relative_position: bool = False,
        use_value_relative_position: bool = False,
        max_relative_position: Optional[int] = None,
        heads_share_relative_embedding: bool = False,
        name: Optional[Text] = None,
    ) -> None:
        super().__init__(name=name)

        self.units = units
        self.unidirectional = unidirectional

        l2_regularizer = tf.keras.regularizers.l2(reg_lambda)
        self._embedding = RandomlyConnectedDense(
            units=units, kernel_regularizer=l2_regularizer, density=density)
        # positional encoding helpers
        self._angles = self._get_angles()
        self._even_indices = np.arange(0, self.units, 2,
                                       dtype=np.int32)[:, np.newaxis]
        self._odd_indices = np.arange(1, self.units, 2,
                                      dtype=np.int32)[:, np.newaxis]

        self._dropout = tf.keras.layers.Dropout(dropout_rate)

        self._enc_layers = [
            TransformerEncoderLayer(
                units,
                num_heads,
                filter_units,
                dropout_rate,
                attention_dropout_rate,
                density,
                unidirectional,
                use_key_relative_position,
                use_value_relative_position,
                max_relative_position,
                heads_share_relative_embedding,
            ) for _ in range(num_layers)
        ]
        self._layer_norm = tf.keras.layers.LayerNormalization(epsilon=1e-6)
Beispiel #5
0
def test_randomly_connected_dense_output_always_dense(
        inputs: np.array, units: int, expected_num_non_zero_outputs: int):
    layer = RandomlyConnectedDense(density=0.0, units=units, use_bias=False)
    y = layer(inputs)
    num_non_zero_outputs = tf.math.count_nonzero(y).numpy()
    assert num_non_zero_outputs == expected_num_non_zero_outputs
Beispiel #6
0
def test_randomly_connected_dense_shape(inputs: np.array, units: int,
                                        expected_output_shape: Tuple[int]):
    layer = RandomlyConnectedDense(units=units)
    y = layer(inputs)
    assert y.shape == expected_output_shape
Beispiel #7
0
def test_randomly_connected_dense_shape(inputs, units, expected_output_shape):
    layer = RandomlyConnectedDense(units=units)
    y = layer(inputs)
    assert y.shape == expected_output_shape