def __init__( self, input_shape, inner_dim, activation=torch.nn.Sigmoid, norm=BatchNorm1d, ): super().__init__() self.inner_dim = inner_dim self.norm = norm self.activation = activation bz, t, chn = input_shape self.conv = Sequential(input_shape=input_shape) self.conv.append( DepthwiseSeparableConv1d, out_channels=chn, kernel_size=1, stride=1, ) self.conv.append(self.norm) self.conv.append(self.activation()) self.avg_pool = AdaptivePool(1) self.bottleneck = Sequential( Linear(input_size=input_shape[-1], n_neurons=self.inner_dim), self.activation(), Linear(input_size=self.inner_dim, n_neurons=chn), self.activation(), )
def __init__( self, vocab, d_model=512, nhead=8, num_encoder_layers=12, num_decoder_layers=0, d_ffn=2048, dropout=0.1, activation=nn.ReLU, positional_encoding="fixed_abs_sine", normalize_before=False, d_embedding=None, max_length=2500, causal=True, attention_type="regularMHA", ): super().__init__( d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, d_ffn=d_ffn, dropout=dropout, activation=activation, positional_encoding=positional_encoding, normalize_before=normalize_before, max_length=max_length, causal=causal, attention_type=attention_type, ) self.d_embedding = d_embedding if d_embedding is None: self.d_embedding = d_model self.custom_src_module = NormalizedEmbedding(self.d_embedding, vocab) self.embedding_proj = None if d_embedding is not None: self.embedding_proj = Linear(input_size=self.d_embedding, n_neurons=d_model) self.output_proj = ModuleList( Linear(input_size=d_model, n_neurons=d_model), LayerNorm(d_model, eps=1e-6), Linear(input_size=d_model, n_neurons=vocab), ) self.num_encoder_layers = num_encoder_layers self.num_decoder_layers = num_decoder_layers # reset the params of the transformer model self._reset_params()
def __init__(self, d_model, nhead, dim_feedforward=256, dropout=0, activation="relu"): from torch.nn.modules.activation import MultiheadAttention from torch.nn.modules.normalization import LayerNorm from torch.nn.modules.dropout import Dropout from torch.nn.modules.rnn import LSTM from torch.nn.modules.linear import Linear super(DPTNetBlock, self).__init__() self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout) # Implementation of Feedforward model # self.linear1 = Linear(d_model, dim_feedforward) self.rnn = LSTM(d_model, d_model * 2, 1, bidirectional=True) self.dropout = Dropout(dropout) # self.linear2 = Linear(dim_feedforward, d_model) self.linear2 = Linear(d_model * 2 * 2, d_model) self.norm1 = LayerNorm(d_model) self.norm2 = LayerNorm(d_model) self.dropout1 = Dropout(dropout) self.dropout2 = Dropout(dropout) self.activation = _get_activation_fn(activation)
def __init__( self, d_model, output_size, output_activation=nn.ReLU, nhead=8, num_layers=8, d_ffn=512, dropout=0.1, activation=nn.LeakyReLU, causal=True, custom_emb_module=None, normalize_before=False, ): super().__init__( d_model=d_model, nhead=nhead, num_encoder_layers=num_layers, num_decoder_layers=0, d_ffn=d_ffn, dropout=dropout, activation=activation, positional_encoding=None, normalize_before=normalize_before, causal=causal, ) self.custom_emb_module = custom_emb_module self.output_layer = Linear(output_size, input_size=d_model, bias=False) self.output_activation = output_activation()
def __init__( self, input_size, device="cpu", lin_blocks=0, lin_neurons=192, out_neurons=1211, ): super().__init__() self.blocks = nn.ModuleList() for block_index in range(lin_blocks): self.blocks.extend( [ _BatchNorm1d(input_size), Linear(input_size=input_size, n_neurons=lin_neurons), ] ) input_size = lin_neurons # Final Layer self.weight = nn.Parameter( torch.FloatTensor(out_neurons, input_size, device=device) ) nn.init.xavier_uniform_(self.weight)
def test_linear(): from speechbrain.nnet.linear import Linear inputs = torch.rand(1, 2, 4) lin_t = Linear(n_neurons=4, input_size=inputs.shape[-1], bias=False) lin_t.w.weight = torch.nn.Parameter(torch.eye(inputs.shape[-1])) outputs = lin_t(inputs) assert torch.all(torch.eq(inputs, outputs))
def append(self, layer, *args, **kwargs): """Appends the specified module to the shortcut model. Arguments --------- layer : torch.nn.Module class This layer will get initialized with *args and **kwargs. Also, the argument ``input_shape`` will be passed if the layer takes it. *args, **kwargs Passed unchanged to the layer **EXCEPT** the kwarg ``end_of_block`` which is used to indicate that the shorcut should be added in. """ if self.new_block: self.blocks.append(Sequential(input_shape=self.block_input_shape)) self.new_block = False end_of_block = False if "end_of_block" in kwargs: end_of_block = kwargs["end_of_block"] del kwargs["end_of_block"] self.blocks[-1].append(layer, *args, **kwargs) # When we reach the end of the block, prepare to add shortcut if end_of_block: # Use dummy input to find shape of next block dummy_input = torch.zeros(self.block_input_shape) dummy_output = self.blocks[-1](dummy_input) # Initialize projection if necessary if self.shortcut_projection: projection_size = functools.reduce( operator.mul, dummy_output.shape[2:], 1 ) if self.shortcut_type == "residual": shape = self.first_input_shape dummy_input = torch.zeros(self.first_input_shape) else: shape = self.block_input_shape self.projections.append( Linear( n_neurons=projection_size, input_shape=shape, bias=False, combine_dims=True, ) ) # Prepare for next block self.new_block = True dummy_output = self._combine(dummy_input, dummy_output, -1) self.block_input_shape = dummy_output.shape
def __init__( self, tgt_vocab, input_size, d_model=512, nhead=8, num_encoder_layers=6, num_decoder_layers=6, d_ffn=2048, dropout=0.1, activation=nn.ReLU, positional_encoding="fixed_abs_sine", normalize_before=False, kernel_size: Optional[int] = 31, bias: Optional[bool] = True, encoder_module: Optional[str] = "transformer", conformer_activation: Optional[nn.Module] = Swish, attention_type: Optional[str] = "regularMHA", max_length: Optional[int] = 2500, causal: Optional[bool] = True, ): super().__init__( d_model=d_model, nhead=nhead, num_encoder_layers=num_encoder_layers, num_decoder_layers=num_decoder_layers, d_ffn=d_ffn, dropout=dropout, activation=activation, positional_encoding=positional_encoding, normalize_before=normalize_before, kernel_size=kernel_size, bias=bias, encoder_module=encoder_module, conformer_activation=conformer_activation, attention_type=attention_type, max_length=max_length, causal=causal, ) self.custom_src_module = ModuleList( Linear( input_size=input_size, n_neurons=d_model, bias=True, combine_dims=False, ), torch.nn.Dropout(dropout), ) self.custom_tgt_module = ModuleList( NormalizedEmbedding(d_model, tgt_vocab)) # reset parameters using xavier_normal_ self._init_params()
def __init__( self, intra_mdl, inter_mdl, out_channels, norm="ln", skip_around_intra=True, linear_layer_after_inter_intra=True, ): super(Dual_Computation_Block, self).__init__() self.intra_mdl = intra_mdl self.inter_mdl = inter_mdl self.skip_around_intra = skip_around_intra self.linear_layer_after_inter_intra = linear_layer_after_inter_intra # Norm self.norm = norm if norm is not None: self.intra_norm = select_norm(norm, out_channels, 4) self.inter_norm = select_norm(norm, out_channels, 4) # Linear if linear_layer_after_inter_intra: if isinstance(intra_mdl, SBRNNBlock): self.intra_linear = Linear( out_channels, input_size=2 * intra_mdl.mdl.rnn.hidden_size ) else: self.intra_linear = Linear( out_channels, input_size=out_channels ) if isinstance(inter_mdl, SBRNNBlock): self.inter_linear = Linear( out_channels, input_size=2 * intra_mdl.mdl.rnn.hidden_size ) else: self.inter_linear = Linear( out_channels, input_size=out_channels )
def __init__( self, device="cpu", activation=torch.nn.LeakyReLU, tdnn_blocks=5, tdnn_channels=[512, 512, 512, 512, 1500], tdnn_kernel_sizes=[5, 3, 3, 1, 1], tdnn_dilations=[1, 2, 3, 1, 1], lin_neurons=512, in_channels=40, ): super().__init__() self.blocks = nn.ModuleList() # TDNN has convolutional layers with the given dilation factors # and kernel sizes. We here loop over all the convolutional layers # that we wanna add. Note that batch normalization is used after # the activations function in this case. This improves the # sound classification performance a bit. for block_index in range(tdnn_blocks): out_channels = tdnn_channels[block_index] self.blocks.extend([ Conv1d( in_channels=in_channels, out_channels=out_channels, kernel_size=tdnn_kernel_sizes[block_index], dilation=tdnn_dilations[block_index], ), activation(), BatchNorm1d(input_size=out_channels), ]) in_channels = tdnn_channels[block_index] # Statistical pooling. It converts a tensor of variable length # into a fixed-length tensor. The statistical pooling returns the # mean and the standard deviation. self.blocks.append(StatisticsPooling()) # Final linear transformation. self.blocks.append( Linear( input_size=out_channels * 2, # mean + std, n_neurons=lin_neurons, bias=True, combine_dims=False, ))
def __init__( self, device="cpu", activation=torch.nn.LeakyReLU, tdnn_blocks=5, tdnn_channels=[512, 512, 512, 512, 1500], tdnn_kernel_sizes=[5, 3, 3, 1, 1], tdnn_dilations=[1, 2, 3, 1, 1], lin_neurons=512, in_channels=40, ): super().__init__() self.blocks = nn.ModuleList() # TDNN layers for block_index in range(tdnn_blocks): out_channels = tdnn_channels[block_index] self.blocks.extend([ Conv1d( in_channels=in_channels, out_channels=out_channels, kernel_size=tdnn_kernel_sizes[block_index], dilation=tdnn_dilations[block_index], ), activation(), BatchNorm1d(input_size=out_channels), ]) in_channels = tdnn_channels[block_index] # Statistical pooling self.blocks.append(StatisticsPooling()) # Final linear transformation self.blocks.append( Linear( input_size=out_channels * 2, n_neurons=lin_neurons, bias=True, combine_dims=False, ))