def __init__( self, in_channels, out_channels, intra_model, inter_model, num_layers=1, norm="ln", K=200, num_spks=2, skip_around_intra=True, linear_layer_after_inter_intra=True, use_global_pos_enc=False, max_length=20000, ): super(Dual_Path_Model, self).__init__() self.K = K self.num_spks = num_spks self.num_layers = num_layers self.norm = select_norm(norm, in_channels, 3) self.conv1d = nn.Conv1d(in_channels, out_channels, 1, bias=False) self.use_global_pos_enc = use_global_pos_enc if self.use_global_pos_enc: self.pos_enc = PositionalEncoding(max_length) self.dual_mdl = nn.ModuleList([]) for i in range(num_layers): self.dual_mdl.append( copy.deepcopy( Dual_Computation_Block( intra_model, inter_model, out_channels, norm, skip_around_intra=skip_around_intra, linear_layer_after_inter_intra=linear_layer_after_inter_intra, ) ) ) self.conv2d = nn.Conv2d( out_channels, out_channels * num_spks, kernel_size=1 ) self.end_conv1x1 = nn.Conv1d(out_channels, in_channels, 1, bias=False) self.prelu = nn.PReLU() self.activation = nn.ReLU() # gated output layer self.output = nn.Sequential( nn.Conv1d(out_channels, out_channels, 1), nn.Tanh() ) self.output_gate = nn.Sequential( nn.Conv1d(out_channels, out_channels, 1), nn.Sigmoid() )
def __init__( self, num_layers, d_model, nhead, d_ffn=2048, input_shape=None, kdim=None, vdim=None, dropout=0.1, activation="swish", kernel_size=31, bias=True, use_positional_encoding=False, ): super(SBConformerEncoderBlock, self).__init__() self.use_positional_encoding = use_positional_encoding if activation == "relu": activation = nn.ReLU elif activation == "gelu": activation = nn.GELU elif activation == "swish": activation = Swish else: raise ValueError("unknown activation") self.mdl = ConformerEncoder( num_layers=num_layers, nhead=nhead, d_ffn=d_ffn, input_shape=input_shape, d_model=d_model, kdim=kdim, vdim=vdim, dropout=dropout, activation=activation, kernel_size=kernel_size, bias=bias, ) if use_positional_encoding: self.pos_enc = PositionalEncoding(input_size=d_model)
def __init__( self, num_layers, d_model, nhead, d_ffn=2048, input_shape=None, kdim=None, vdim=None, dropout=0.1, activation="relu", return_attention=False, num_modules=1, use_group_comm=False, use_positional_encoding=False, norm_before=False, ): super(SBTransformerBlock, self).__init__() self.use_positional_encoding = use_positional_encoding if activation == "relu": activation = nn.ReLU elif activation == "gelu": activation = nn.GELU else: raise ValueError("unknown activation") self.mdl = TransformerEncoder( num_layers=num_layers, nhead=nhead, d_ffn=d_ffn, input_shape=input_shape, d_model=d_model, kdim=kdim, vdim=vdim, dropout=dropout, activation=activation, normalize_before=norm_before, ) if use_positional_encoding: self.pos_enc = PositionalEncoding(input_size=d_model)