def __init__(self, model_dim: int, feed_forward_dimension: int, num_heads: list, maximum_position_encoding: int, conv_filters: int, dropout_rate: float, dense_blocks: int, kernel_size: int, conv_activation: str, **kwargs): super(SelfAttentionBlocks, self).__init__(**kwargs) self.model_dim = model_dim self.maximum_position_encoding = maximum_position_encoding self.pos_encoding_scalar = tf.Variable(1.) self.pos_encoding = positional_encoding(maximum_position_encoding, model_dim) self.dropout = tf.keras.layers.Dropout(dropout_rate) self.encoder_SADB = [ SelfAttentionDenseBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, dense_hidden_units=feed_forward_dimension, name=f'{self.name}_SADB_IN_{i}') for i, n_heads in enumerate(num_heads[:dense_blocks]) ] self.encoder_SACB = [ SelfAttentionConvBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, name=f'{self.name}_SACB_IN_{i}', kernel_size=kernel_size, conv_activation=conv_activation, conv_filters=conv_filters) for i, n_heads in enumerate(num_heads[dense_blocks:]) ] self.seq_resnet = FFNResNorm(model_dim=model_dim, dense_hidden_units=model_dim, dropout_rate=dropout_rate)
def __init__(self, model_dim: int, feed_forward_dimension: int, num_heads: list, maximum_position_encoding: int, dropout_rate: float, dense_blocks: int, conv_filters: int, conv_activation: str, conv_padding: str, conv_kernel: int, **kwargs): super(CrossAttentionBlocks, self).__init__(**kwargs) self.model_dim = model_dim self.pos_encoding_scalar = tf.Variable(1.) self.pos_encoding = positional_encoding(maximum_position_encoding, model_dim) self.dropout = tf.keras.layers.Dropout(dropout_rate) self.CADB = [ CrossAttentionDenseBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, dense_hidden_units=feed_forward_dimension, name=f'{self.name}_CADB_{i}') for i, n_heads in enumerate(num_heads[:dense_blocks])] self.CACB = [ CrossAttentionConvBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, name=f'{self.name}_CACB_{i}', conv_filters=conv_filters, conv_activation=conv_activation, conv_padding=conv_padding, kernel_size=conv_kernel) for i, n_heads in enumerate(num_heads[dense_blocks:])]
def __init__(self, model_dim: int, feed_forward_dimension: int, num_heads: list, maximum_position_encoding: int, dropout_rate=0.1, dense_blocks=1, **kwargs): super(SelfAttentionBlocks, self).__init__(**kwargs) self.model_dim = model_dim self.pos_encoding_scalar = tf.Variable(1.) self.pos_encoding = positional_encoding(maximum_position_encoding, model_dim) self.dropout = tf.keras.layers.Dropout(dropout_rate) self.encoder_SADB = [ SelfAttentionDenseBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, dense_hidden_units=feed_forward_dimension, name=f'{self.name}_SADB_{i}') for i, n_heads in enumerate(num_heads[:dense_blocks]) ] self.encoder_SACB = [ SelfAttentionConvBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, name=f'{self.name}_SACB_{i}') for i, n_heads in enumerate(num_heads[dense_blocks:]) ]
def call(self, inputs, training, fb_switch, padding_mask, min_index, random_padding_mask, drop_n_heads): shift_pos_encoding = positional_encoding( self.maximum_position_encoding, self.model_dim, start_index=min_index) seq_len = tf.shape(inputs)[1] x = inputs * tf.math.sqrt(tf.cast(self.model_dim, tf.float32)) x1 = x + self.pos_encoding_scalar * shift_pos_encoding[:, :seq_len, :] x2 = x + self.pos_encoding_scalar * self.pos_encoding[:, :seq_len, :] x1 = self.dropout(x1, training=training) x2 = self.dropout(x2, training=training) random_mask = tf.maximum( tf.cast(padding_mask, tf.float32), tf.cast(random_padding_mask[:, tf.newaxis, tf.newaxis, :], tf.float32)) attention_weights = {} for i, block in enumerate(self.encoder_SACB): x1, x2, attn_weights1, attn_weights2 = block( x1, x2, training=training, padding_mask=padding_mask, random_mask=random_mask, drop_n_heads=drop_n_heads) attention_weights[ f'{self.name}_ConvBlock{i + 1}_SelfAttention1'] = attn_weights1 attention_weights[ f'{self.name}_ConvBlock{i + 1}_SelfAttention2'] = attn_weights2 for i, block in enumerate(self.encoder_SADB): x1, x2, attn_weights1, attn_weights2 = block( x1, x2, training=training, padding_mask=padding_mask, random_mask=random_mask, drop_n_heads=drop_n_heads) attention_weights[ f'{self.name}_DenseBlock{i + 1}_SelfAttention1'] = attn_weights1 attention_weights[ f'{self.name}_DenseBlock{i + 1}_SelfAttention2'] = attn_weights2 x1 = self.spk_resnet(x1) if (fb_switch < 0.5): x1 = tf.reverse(x1, axis=[-1]) x1 = self.spk_rnn(x1) x1 = tf.nn.l2_normalize(x1, 1) x2 = self.seq_resnet(x2) return x1, x2, attention_weights
def __init__(self, num_heads, d_model, dff, output_vocab_size, maximum_position_encoding, layernorm, num_layers = 4, rate= 0.1, **kwargs): super(Decoder, self).__init__(**kwargs) self.d_model = d_model self.num_layers = num_layers self.num_heads = num_heads self._layernorm= layernorm self.embedding = tf.keras.layers.Embedding(output_vocab_size, d_model) self.pos_encoding = positional_encoding(maximum_position_encoding, self.d_model) self.layernorm = tf.keras.layers.LayerNormalization(epsilon= 1e-6) self.dec_layers = [DecoderLayer(num_heads, dff, d_model, rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(rate)
def __init__(self, model_dim: int, feed_forward_dimension: int, num_heads: list, maximum_position_encoding: int, dropout_rate: float, **kwargs): super(CrossAttentionBlocks, self).__init__(**kwargs) self.model_dim = model_dim self.pos_encoding_scalar = tf.Variable(1.) self.pos_encoding = positional_encoding(maximum_position_encoding, model_dim) self.dropout = tf.keras.layers.Dropout(dropout_rate) self.CADB = [ CrossAttentionDenseBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, dense_hidden_units=feed_forward_dimension, name=f'{self.name}_CADB_{i}') for i, n_heads in enumerate(num_heads[:-1]) ] self.last_CADB = CrossAttentionDenseBlock( model_dim=model_dim, dropout_rate=dropout_rate, num_heads=num_heads[-1], dense_hidden_units=feed_forward_dimension, name=f'{self.name}_CADB_last')
def __init__(self, model_dim: int, feed_forward_dimension: int, num_heads: list, maximum_position_encoding: int, conv_filters: int, dropout_rate: float, dense_blocks: int, kernel_size: int, conv_activation: str, **kwargs): super(SelfAttentionBlocks, self).__init__(**kwargs) # initialize the parent class (=keras.layers.Layer) self.model_dim = model_dim # initialize model dimension self.pos_encoding_scalar = tf.Variable(1.) # position of the last element self.pos_encoding = positional_encoding(maximum_position_encoding, model_dim) # positional encoding (triangular encoding) self.dropout = tf.keras.layers.Dropout(dropout_rate) # droput layer for regularization # encoder using dense-type attention mechanism self.encoder_SADB = [ SelfAttentionDenseBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, dense_hidden_units=feed_forward_dimension, name=f'{self.name}_SADB_{i}') for i, n_heads in enumerate(num_heads[:dense_blocks])] # some heads will be dedicated to dense-type attention: num_heads[:dense_blocks] # encoder using convolutional-type attention mechanism self.encoder_SACB = [ SelfAttentionConvBlock(model_dim=model_dim, dropout_rate=dropout_rate, num_heads=n_heads, name=f'{self.name}_SACB_{i}', kernel_size=kernel_size, conv_activation=conv_activation, conv_filters=conv_filters) for i, n_heads in enumerate(num_heads[dense_blocks:])] # some heads will be dedicated to conv-type attention: num_heads[dense_blocks:]