def test_norms(norm_str, channel_size): norm_layer = norms.get(norm_str) # Use get on the class out_from_get = norms.get(norm_layer) assert out_from_get == norm_layer # Use get on the instance norm_layer = norm_layer(channel_size) out_from_get = norms.get(norm_layer) assert out_from_get == norm_layer # Test forward inp = torch.randn(4, channel_size, 12) _ = norm_layer(inp)
def __init__(self, in_chan, n_src, out_chan=None, n_blocks=8, n_repeats=3, bn_chan=128, hid_chan=512, kernel_size=3, norm_type="gLN"): super(TCN, self).__init__() self.in_chan = in_chan self.n_src = n_src out_chan = out_chan if out_chan else in_chan self.out_chan = out_chan self.n_blocks = n_blocks self.n_repeats = n_repeats self.bn_chan = bn_chan self.hid_chan = hid_chan self.kernel_size = kernel_size self.norm_type = norm_type layer_norm = norms.get(norm_type)(in_chan) bottleneck_conv = nn.Conv1d(in_chan, bn_chan, 1) self.bottleneck = nn.Sequential(layer_norm, bottleneck_conv) # Succession of Conv1DBlock with exponentially increasing dilation. self.TCN = nn.ModuleList() for r in range(n_repeats): for x in range(n_blocks): padding = (kernel_size - 1) * 2**x // 2 self.TCN.append(Conv1DBlock(bn_chan, hid_chan, kernel_size, padding=padding, dilation=2**x, norm_type=norm_type)) out_conv = nn.Conv1d(bn_chan, n_src*out_chan, 1) self.out = nn.Sequential(nn.PReLU(), out_conv)
def __init__(self, in_chan=256, out_chan_tcn=1, n_blocks=5, n_repeats=3, bn_chan=64, hid_chan=128, kernel_size=3, norm_type="gLN", chunk=200, stride=40, ): super(TCN, self).__init__() self.in_chan = in_chan self.out_chan_tcn = out_chan_tcn self.n_blocks = n_blocks self.n_repeats = n_repeats self.bn_chan = bn_chan self.hid_chan = hid_chan self.kernel_size = kernel_size self.norm_type = norm_type self.chunk = chunk self.stride = stride self.in_norm = norms.get(norm_type)(in_chan) self.bottleneck = nn.Sequential(nn.Conv1d(in_chan, bn_chan, 1)) # Succession of Conv1DBlock with exponentially increasing dilation. self.TCN = nn.ModuleList() for r in range(n_repeats): res_blocks = nn.ModuleList() for x in range(n_blocks): padding = (kernel_size - 1) * 2**x // 2 res_blocks.append(Conv1DBlock(bn_chan, hid_chan, kernel_size, padding=padding, dilation=2**x, norm_type=norm_type)) self.TCN.append(res_blocks) self.out = nn.Sequential(nn.PReLU(), nn.Conv1d(bn_chan, 1, 1))
def __init__( self, embed_dim, n_heads, dim_ff, dropout=0.0, activation="relu", norm="gLN", ): super(PreLNTransformerLayer, self).__init__() self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout) self.dropout = nn.Dropout(dropout) self.linear1 = nn.Linear(embed_dim, dim_ff) self.linear2 = nn.Linear(dim_ff, embed_dim) self.activation = activations.get(activation)() self.norm_mha = norms.get(norm)(embed_dim) self.norm_ff = norms.get(norm)(embed_dim)
def __init__(self, in_chan, hid_chan, kernel_size, padding, dilation, norm_type="bN", delta=False): super(Conv1DBlock, self).__init__() conv_norm = norms.get(norm_type) self.delta = delta if delta: self.linear = nn.Linear(in_chan, in_chan) self.linear_norm = norms.get(norm_type)(in_chan*2) in_bottle = in_chan if not delta else in_chan*2 in_conv1d = nn.Conv1d(in_bottle, hid_chan, 1) depth_conv1d = nn.Conv1d(hid_chan, hid_chan, kernel_size, padding=padding, dilation=dilation, groups=hid_chan) self.shared_block = nn.Sequential(in_conv1d, nn.PReLU(), conv_norm(hid_chan), depth_conv1d, nn.PReLU(), conv_norm(hid_chan)) self.res_conv = nn.Conv1d(hid_chan, in_chan, 1)
def __init__( self, embed_dim, n_heads, dim_ff, dropout=0.0, activation="relu", bidirectional=True, norm="gLN", ): super(ImprovedTransformedLayer, self).__init__() self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout) self.recurrent = nn.LSTM(embed_dim, dim_ff, bidirectional=bidirectional) self.dropout = nn.Dropout(dropout) ff_inner_dim = 2 * dim_ff if bidirectional else dim_ff self.linear = nn.Linear(ff_inner_dim, embed_dim) self.activation = activations.get(activation)() self.norm_mha = norms.get(norm)(embed_dim) self.norm_ff = norms.get(norm)(embed_dim)
def test_register(): class Custom(nn.Module): def __init__(self): super().__init__() norms.register_norm(Custom) cls = norms.get("Custom") assert cls == Custom with pytest.raises(ValueError): norms.register_norm(norms.CumLN)
def __init__( self, in_chan, bn_chan, hid_size, chunk_size, hop_size=None, n_repeats=6, norm_type="gLN", bidirectional=True, rnn_type="LSTM", use_mulcat=True, num_layers=1, dropout=0, ): super(DPRNN_Multistage, self).__init__() self.in_chan = in_chan self.bn_chan = bn_chan self.hid_size = hid_size self.chunk_size = chunk_size hop_size = hop_size if hop_size is not None else chunk_size // 2 self.hop_size = hop_size self.n_repeats = n_repeats self.norm_type = norm_type self.bidirectional = bidirectional self.rnn_type = rnn_type self.num_layers = num_layers self.dropout = dropout self.use_mulcat = use_mulcat self.num_layers = num_layers layer_norm = norms.get(norm_type)(in_chan) bottleneck_conv = nn.Conv1d(in_chan, bn_chan, 1) self.bottleneck = nn.Sequential(layer_norm, bottleneck_conv) # Succession of DPRNNBlocks. self.net = nn.ModuleList([]) for i in range(self.n_repeats): self.net.append( DPRNNBlock( bn_chan, hid_size, norm_type=norm_type, bidirectional=bidirectional, rnn_type=rnn_type, use_mulcat=use_mulcat, num_layers=num_layers, dropout=dropout, ))
def __init__(self, input_size, n_outs=5, hidden_sizes=(512, 1024, 512, 256), bidirectional=False): super(LSTMDense, self).__init__() self.norm = norms.get("gLN")(input_size) self.lstm = nn.LSTM(input_size, hidden_sizes[0], bidirectional=bidirectional) out_feats = hidden_sizes[ 0] if bidirectional == False else hidden_sizes[0] * 2 self.denses = nn.Sequential( nn.Linear(out_feats, hidden_sizes[1]), nn.ReLU(), nn.Linear(hidden_sizes[1], hidden_sizes[2]), nn.ReLU(), nn.Linear(hidden_sizes[2], hidden_sizes[3]), nn.ReLU()) self.out = nn.Sequential(nn.Linear(hidden_sizes[-1], n_outs), nn.Softmax())
def __init__(self, in_size, n_outs=3, repeats=2, blocks=2, channels=(64, 32, 128, 64), hidden_size=40, bidirectional=False, ksz=(3, 3), dropout=0, activation=nn.ReLU()): super(C2DRNN, self).__init__() self.in_size = in_size assert len(channels) == repeats * blocks self.norm = norms.get("gLN")(in_size) net = [] for i in range(repeats): for j in range(blocks): if i == 0 and j == 0: conv_in = 1 else: conv_in = channels[i * 2 + j - 1] net.extend([ nn.Conv2d(conv_in, channels[i * 2 + j], ksz, 1), activation ]) net.append(nn.MaxPool2d(ksz)) net.append(nn.Dropout(dropout)) self.feats = nn.Sequential(*net) self.lstm = nn.LSTM(64 * 5, hidden_size, bidirectional=bidirectional) feats_in = hidden_size if not bidirectional else hidden_size * 2 self.max1d = nn.MaxPool1d(2, stride=2) self.out = nn.Sequential(nn.Linear(1040, n_outs), nn.Softmax(-1))
def __init__( self, in_chan, n_src, n_heads=4, ff_hid=256, chunk_size=100, hop_size=None, n_repeats=6, norm_type="gLN", ff_activation="relu", mask_act="relu", bidirectional=True, dropout=0, ): super(DPTransformer, self).__init__() self.in_chan = in_chan self.n_src = n_src self.n_heads = n_heads self.ff_hid = ff_hid self.chunk_size = chunk_size hop_size = hop_size if hop_size is not None else chunk_size // 2 self.hop_size = hop_size self.n_repeats = n_repeats self.n_src = n_src self.norm_type = norm_type self.ff_activation = ff_activation self.mask_act = mask_act self.bidirectional = bidirectional self.dropout = dropout self.in_norm = norms.get(norm_type)(in_chan) # Succession of DPRNNBlocks. self.layers = nn.ModuleList([]) for x in range(self.n_repeats): self.layers.append( nn.ModuleList([ ImprovedTransformedLayer( self.in_chan, self.n_heads, self.ff_hid, self.dropout, self.ff_activation, True, self.norm_type, ), ImprovedTransformedLayer( self.in_chan, self.n_heads, self.ff_hid, self.dropout, self.ff_activation, self.bidirectional, self.norm_type, ), ])) net_out_conv = nn.Conv2d(self.in_chan, n_src * self.in_chan, 1) self.first_out = nn.Sequential(nn.PReLU(), net_out_conv) # Gating and masking in 2D space (after fold) self.net_out = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1), nn.Tanh()) self.net_gate = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1), nn.Sigmoid()) # Get activation function. mask_nl_class = activations.get(mask_act) # For softmax, feed the source dimension. if has_arg(mask_nl_class, "dim"): self.output_act = mask_nl_class(dim=1) else: self.output_act = mask_nl_class()
def test_get_none(): assert norms.get(None) is None
def test_get_errors(wrong): with pytest.raises(ValueError): # Should raise for anything not a Optimizer instance + unknown string norms.get(wrong)
def __init__( self, in_chan, n_src, n_heads=4, ff_hid=256, chunk_size=100, hop_size=None, n_repeats=6, norm_type="gLN", ff_activation="relu", mask_act="relu", bidirectional=True, dropout=0, ): super(DPTransformer, self).__init__() self.in_chan = in_chan self.n_src = n_src self.n_heads = n_heads self.ff_hid = ff_hid self.chunk_size = chunk_size hop_size = hop_size if hop_size is not None else chunk_size // 2 self.hop_size = hop_size self.n_repeats = n_repeats self.n_src = n_src self.norm_type = norm_type self.ff_activation = ff_activation self.mask_act = mask_act self.bidirectional = bidirectional self.dropout = dropout self.mha_in_dim = ceil(self.in_chan / self.n_heads) * self.n_heads if self.in_chan % self.n_heads != 0: warnings.warn( f"DPTransformer input dim ({self.in_chan}) is not a multiple of the number of " f"heads ({self.n_heads}). Adding extra linear layer at input to accomodate " f"(size [{self.in_chan} x {self.mha_in_dim}])") self.input_layer = nn.Linear(self.in_chan, self.mha_in_dim) else: self.input_layer = None self.in_norm = norms.get(norm_type)(self.mha_in_dim) self.ola = DualPathProcessing(self.chunk_size, self.hop_size) # Succession of DPRNNBlocks. self.layers = nn.ModuleList([]) for x in range(self.n_repeats): self.layers.append( nn.ModuleList([ ImprovedTransformedLayer( self.mha_in_dim, self.n_heads, self.ff_hid, self.dropout, self.ff_activation, True, self.norm_type, ), ImprovedTransformedLayer( self.mha_in_dim, self.n_heads, self.ff_hid, self.dropout, self.ff_activation, self.bidirectional, self.norm_type, ), ])) net_out_conv = nn.Conv2d(self.mha_in_dim, n_src * self.in_chan, 1) self.first_out = nn.Sequential(nn.PReLU(), net_out_conv) # Gating and masking in 2D space (after fold) self.net_out = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1), nn.Tanh()) self.net_gate = nn.Sequential(nn.Conv1d(self.in_chan, self.in_chan, 1), nn.Sigmoid()) # Get activation function. mask_nl_class = activations.get(mask_act) # For softmax, feed the source dimension. if has_arg(mask_nl_class, "dim"): self.output_act = mask_nl_class(dim=1) else: self.output_act = mask_nl_class()
def __init__( self, embed_dim, # in_chan n_heads, dim_ff, dropout=0.0, activation="relu", # ff activation bidirectional=True, norm="gLN", n_blocks=3, bn_chan=128, hid_chan=512, skip_chan=128, conv_kernel_size=3, use_sdu=False, use_mem=False, num_mem_token=2): super(AcousticTransformerLayer, self).__init__() self.use_sdu = use_sdu self.use_mem = use_mem self.num_mem_token = num_mem_token if use_mem: w = torch.empty(num_mem_token, embed_dim) nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu')) self.mem = nn.Parameter(w, requires_grad=True).unsqueeze(0) # query,key,value dim self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout) # input dim, hidden dim # self.ff1 = nn.Sequential( # norms.get(norm)(embed_dim), # nn.Conv1d(embed_dim, dim_ff, kernel_size=1, stride=1, padding=0, bias=True), # activations.get(activation)(), # ) self.ff1 = nn.Conv1d(embed_dim, dim_ff, kernel_size=1, stride=1, padding=0, bias=True) self.ff2 = nn.Conv1d(dim_ff, embed_dim, kernel_size=1, stride=1, padding=0, bias=True) self.dropout = nn.Dropout(dropout) self.activation = activations.get(activation)() self.norm_mha = norms.get(norm)(embed_dim) self.norm_ff = norms.get(norm)(embed_dim) self.norm_out = norms.get(norm)(embed_dim) if use_sdu: self.mha_out = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1), nn.Tanh()) self.mha_gate = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1), nn.Sigmoid()) self.ff_out = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1), nn.Tanh()) self.ff_gate = nn.Sequential(nn.Conv1d(embed_dim, embed_dim, 1), nn.Sigmoid())
def __init__( self, in_chan, # encoder out channel 64 n_src, out_chan=None, bn_chan=64, n_heads=4, ff_hid=256, rnn_hid=128, rnn_layers=1, pe_conv_k=3, chunk_size=100, hop_size=None, # 50 n_repeats=6, # 2 norm_type="gLN", ff_activation="relu", mask_act="relu", # sigmoid bidirectional=True, dropout=0, ): super(DualTransformer, self).__init__() self.in_chan = in_chan out_chan = out_chan if out_chan is not None else in_chan self.out_chan = out_chan self.bn_chan = bn_chan self.n_src = n_src self.n_heads = n_heads self.ff_hid = ff_hid self.rnn_hid = rnn_hid self.rnn_layers = rnn_layers self.chunk_size = chunk_size hop_size = hop_size if hop_size is not None else chunk_size // 2 self.hop_size = hop_size self.n_repeats = n_repeats self.n_src = n_src self.norm_type = norm_type self.ff_activation = ff_activation self.mask_act = mask_act self.bidirectional = bidirectional self.dropout = dropout # mean, var for the whole sequence and channel, but gamma beta only for channel size # gln vs cln: on whole sequence or separately # self.in_norm = norms.get(norm_type)(in_chan) layer_norm = norms.get(norm_type)(in_chan) bottleneck_conv = nn.Conv1d(in_chan, bn_chan, 1) self.bottleneck = nn.Sequential(layer_norm, bottleneck_conv) pe_conv_list = [] for i in range(pe_conv_k): pe_conv_list.append( nn.Conv2d(bn_chan, bn_chan, kernel_size=3, stride=1, padding=1, bias=False)) pe_conv_list.append(norms.get(norm_type)(bn_chan)) pe_conv_list.append(activations.get(ff_activation)()) self.pe_conv = nn.Sequential(*pe_conv_list) d_model = self.bn_chan # # *2 for PE # self.pe = PositionalEmbedding(in_chan) # d_model = self.in_chan * 2 # Succession of DPRNNBlocks. self.layers = nn.ModuleList([]) for x in range(self.n_repeats): self.layers.append( nn.ModuleList([ # ImprovedTransformedLayer( # d_model, # self.n_heads, # self.ff_hid, # self.dropout, # self.ff_activation, # True, # self.norm_type, # ), # ImprovedTransformedLayer( # d_model, # self.n_heads, # self.ff_hid, # self.dropout, # self.ff_activation, # self.bidirectional, # self.norm_type, # ), SingleRNNBlock( in_chan=d_model, hid_size=self.rnn_hid, norm_type=self.norm_type, bidirectional=self.bidirectional, rnn_type='LSTM', num_layers=1, dropout=self.dropout, ), # DualTransformedLayer( # d_model, # self.n_heads, # self.ff_hid, # self.dropout, # self.ff_activation, # self.norm_type, # ), AcousticTransformerLayer( d_model, self.n_heads, self.ff_hid, self.dropout, self.ff_activation, self.norm_type, ), ])) # self.layers.append( # nn.ModuleList( # [ # DualTransformedLayer( # d_model, # self.n_heads, # self.ff_hid, # self.dropout, # self.ff_activation, # self.norm_type, # ), # DualTransformedLayer( # d_model, # self.n_heads, # self.ff_hid, # self.dropout, # self.ff_activation, # self.norm_type, # ), # ] # ) # ) # 1x1 conv # *2 for PE self.strnn_norm_out = norms.get(norm_type)(self.bn_chan) net_out_conv = nn.Conv2d(d_model, n_src * self.bn_chan, 1) self.first_out = nn.Sequential(nn.PReLU(), net_out_conv) # Gating and masking in 2D space (after fold) self.net_out = nn.Sequential(nn.Conv1d(self.bn_chan, self.bn_chan, 1), nn.Tanh()) self.net_gate = nn.Sequential(nn.Conv1d(self.bn_chan, self.bn_chan, 1), nn.Sigmoid()) self.mask_net = nn.Conv1d(bn_chan, out_chan, 1, bias=False) # Get activation function. mask_nl_class = activations.get(mask_act) # For softmax, feed the source dimension. if has_arg(mask_nl_class, "dim"): self.output_act = mask_nl_class(dim=1) else: self.output_act = mask_nl_class()
def __init__( self, embed_dim, # in_chan n_heads, dim_ff, dropout=0.0, activation="relu", # ff activation bidirectional=True, norm="gLN", n_blocks=3, bn_chan=128, hid_chan=512, skip_chan=128, conv_kernel_size=3, ): super(DualTransformedLayer, self).__init__() # query,key,value dim self.mha = MultiheadAttention(embed_dim, n_heads, dropout=dropout) # ------1------ # self.recurrent = nn.LSTM(embed_dim, dim_ff, bidirectional=bidirectional) # ff_inner_dim = 2 * dim_ff if bidirectional else dim_ff # self.linear = nn.Linear(ff_inner_dim, embed_dim) # ------2------ # input dim, hidden dim self.ff = nn.Sequential( norms.get(norm)(embed_dim), activations.get(activation)(), nn.Conv1d(embed_dim, dim_ff, kernel_size=1, stride=1, padding=0, bias=False), norms.get(norm)(dim_ff), activations.get(activation)(), nn.Conv1d(dim_ff, embed_dim, kernel_size=1, stride=1, padding=0, bias=False), ) # # ------3------ # self.skip_chan = skip_chan # self.ff = nn.ModuleList() # for x in range(n_blocks): # padding = (conv_kernel_size - 1) * (2 ** x - 1) // 2 # self.ff.append( # Conv1DBlock( # bn_chan, # hid_chan, # skip_chan, # conv_kernel_size, # padding=padding, # dilation=(2 ** x - 1), # norm_type=norm, # ) # ) self.dropout = nn.Dropout(dropout) self.activation = activations.get(activation)() self.norm_mha = norms.get(norm)(embed_dim) self.norm_ff = norms.get(norm)(embed_dim)