Ejemplo n.º 1
0
def conv__gru_3x368_drop02(num_outputs) -> nn.Module:
    model = SequentialSequential(*[
        ConvExtractor(),
        LambdaModule(lambda seq, seq_len: (F.relu(seq), seq_len)),
        RNNEncoder(dropout=0.2, rnn_type="GRU", num_layers=3, hidden_size=368, input_size=512),
        SequentialLinear(368 * 2, num_outputs, pre_activation=True)
    ])
    return model
Ejemplo n.º 2
0
def conv_instnorm__gru_2x256_drop02(num_outputs) -> nn.Module:
    model = SequentialSequential(*[
        ConvExtractor(norm=nn.InstanceNorm2d),
        LambdaModule(lambda seq, seq_len: (F.relu(seq), seq_len)),
        RNNEncoder(dropout=0.2, rnn_type="GRU", num_layers=2, hidden_size=256, input_size=512),
        SequentialLinear(256 * 2, num_outputs, pre_activation=True)
    ])
    return model
Ejemplo n.º 3
0
def conv__gru_2x256_drop02__transf_2x4x128x256_drop01(num_outputs) -> nn.Module:
    model = SequentialSequential(*[
        ConvExtractor(),
        LambdaModule(lambda seq, seq_len: (F.relu(seq), seq_len)),
        RNNEncoder(dropout=0.2, rnn_type="GRU", num_layers=2, hidden_size=256, input_size=512),
        SequentialLinear(256 * 2, 128, pre_activation=True),
        TransformerEncoder(dropout=0.1, num_layers=2, num_heads=4, dim_model=128, dim_feedforward=256),
        SequentialLinear(128, num_outputs, pre_activation=True)
    ])
    return model
Ejemplo n.º 4
0
    def __init__(self,
                 num_outputs,
                 dropout=0.2,
                 n_rnn=2,
                 rnn_type="GRU",
                 rnn_dim=256):
        super().__init__()
        # input: Bx3x128xL
        left_context = 19
        right_context = 19 + 4
        self.encoder = nn.Sequential(*[
            nn.ReplicationPad2d([left_context, right_context, 0, 0]),
            nn.BatchNorm2d(3),
            nn.Conv2d(3, 64, kernel_size=(3, 3), padding=[1, 0]),  # L - 2
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=(4, 2), stride=2),  # / 2
            nn.Conv2d(64, 128, kernel_size=(3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=(4, 2), stride=2),  # /2
            nn.Conv2d(128, 256, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.ZeroPad2d([0, 0, 2, 1]),  # same padding for maxpool2d
            nn.MaxPool2d(kernel_size=(4, 1), padding=0),  # pool_4
            nn.Conv2d(256, 512, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.ZeroPad2d([0, 0, 1, 2]),  # same padding for maxpool2d
            nn.MaxPool2d(kernel_size=(4, 1), padding=0),
            nn.Conv2d(512, 512, (2, 2)),  # 512x1x255 CxHxW # -1
            nn.ReLU(),
            LambdaModule(lambda x: x.squeeze(dim=2).permute(2, 0, 1)),  # LxBxC
        ])

        self.rnn_dropout = nn.Dropout(dropout)
        rnn_type = getattr(nn, rnn_type)
        self.n_rnn = n_rnn
        if n_rnn > 0:
            self.rnn = rnn_type(input_size=512,
                                hidden_size=rnn_dim,
                                bidirectional=True,
                                dropout=dropout,
                                batch_first=False,
                                num_layers=n_rnn)
        else:
            self.rnn = nn.Identity()
        self.final = nn.Linear(rnn_dim * 2, num_outputs)
Ejemplo n.º 5
0
 def __init__(self, num_outputs, dropout=0.2, n_rnn=2, rnn_type="GRU"):
     super().__init__()
     self.num_outputs = num_outputs
     left_context = 62
     right_context = 62
     self.encoder = nn.Sequential(*[
         nn.ReplicationPad2d([left_context, right_context, 0, 0]),
         nn.Conv2d(3, 64, kernel_size=(5, 5), padding=(2,
                                                       0)),  # 128, time -4
         nn.MaxPool2d(kernel_size=(2, 2)),  # to 64, time / 2 // (24+2) * 2
         ResBlock(64, 64, stride_h=2),  # to 32, time -4 // 24+2
         nn.MaxPool2d(kernel_size=(2, 2)),  # to 16, time/2 // 12*2
         ResBlock(64, 128, stride_h=2),  # to 8, time -4
         ResBlock(128, 128),  # 8, time -4
         ResBlock(128, 256, stride_h=2),  # to 4, time -4
         ResBlock(256, 256),  # 4, time -4
         ResBlock(256, 512, stride_h=2),  # to 2, time -4
         ResBlock(512, 512, stride_h=2),  # to 1, time -4
         ResBlock(512, 512),  # 1, time -4
         LambdaModule(lambda x: x.squeeze(2)),  # BxCx1xL -> BxCxL
         nn.BatchNorm1d(512),
         nn.ReLU(),
         nn.Conv1d(512, 512, kernel_size=1),
         nn.BatchNorm1d(512),
         nn.ReLU(),
         LambdaModule(lambda x: x.permute(2, 0, 1)),  # LxBxC
     ])
     self.rnn_dropout = nn.Dropout(dropout)
     rnn_type = getattr(nn, rnn_type)
     self.n_rnn = n_rnn
     if n_rnn > 0:
         self.rnn = rnn_type(input_size=512,
                             hidden_size=256,
                             bidirectional=True,
                             dropout=dropout,
                             batch_first=False,
                             num_layers=n_rnn)
     else:
         self.rnn = nn.Identity()
     self.final = nn.Linear(512, num_outputs)
Ejemplo n.º 6
0
    def __init__(self, num_outputs, dropout=0.1, n_layers=2, n_head=4, dim_feedforward=512):
        super().__init__()
        # input: Bx3x128xL
        left_context = 19
        right_context = 19 + 4
        self.encoder = nn.Sequential(*[
            nn.ReplicationPad2d([left_context, right_context, 0, 0]),
            nn.BatchNorm2d(3),
            nn.Conv2d(3, 64, kernel_size=(3, 3), padding=[1, 0]),  # L - 2
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.MaxPool2d(kernel_size=(4, 2), stride=2),  # / 2
            nn.Conv2d(64, 128, kernel_size=(3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(128),
            nn.MaxPool2d(kernel_size=(4, 2), stride=2),  # /2
            nn.Conv2d(128, 256, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.Conv2d(256, 256, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(256),
            nn.ZeroPad2d([0, 0, 2, 1]),  # same padding for maxpool2d
            nn.MaxPool2d(kernel_size=(4, 1), padding=0),  # pool_4
            nn.Conv2d(256, 512, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.Conv2d(512, 512, (3, 3), padding=[1, 0]),  # -2
            nn.ReLU(),
            nn.BatchNorm2d(512),
            nn.ZeroPad2d([0, 0, 1, 2]),  # same padding for maxpool2d
            nn.MaxPool2d(kernel_size=(4, 1), padding=0),
            nn.Conv2d(512, 512, (2, 2)),  # 512x1x255 CxHxW # -1
            nn.ReLU(),
            LambdaModule(lambda x: x.squeeze(dim=2).permute(2, 0, 1)),  # LxBxC
        ])
        self.reduce_dim = nn.Linear(512, 128)

        self.pos_encoder = PositionalEncoding(128, dropout=dropout)
        encoder_layers = nn.TransformerEncoderLayer(128, n_head, dim_feedforward=dim_feedforward, dropout=dropout)
        encoder_norm = nn.LayerNorm(128)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, n_layers, encoder_norm)
        self.final = nn.Linear(128, num_outputs)