Ejemplo n.º 1
0
 def __init__(
     self,
     in_channels=1,
     out_channels=32,
     input_dim=312,
     hidden_dim=32,
     output_dim=10,
 ):
     super(cnn1d_ser, self).__init__()
     self.classifier = nn.Sequential(
         nn.Conv1d(in_channels, out_channels, 5, stride=1, padding=2),
         nn.BatchNorm1d(out_channels),
         nn.ReLU(),
         nn.Dropout(0.5),
         nn.Conv1d(out_channels, out_channels, 5, stride=1, padding=2),
         nn.BatchNorm1d(out_channels),
         nn.ReLU(),
         nn.Dropout(0.5),
         nn.Flatten(),
         nn.Linear(input_dim * out_channels, hidden_dim),
         nn.BatchNorm1d(hidden_dim),
         nn.ReLU(),
         nn.Dropout(0.5),
         nn.Linear(hidden_dim, output_dim),
     )
Ejemplo n.º 2
0
 def __init__(
     self,
     c_in,
     c_cond,
     c_h,
     c_out,
     kernel_size,
     n_conv_blocks,
     upsample,
     act,
     sn,
     dropout_rate,
 ):
     super(Decoder, self).__init__()
     self.n_conv_blocks = n_conv_blocks
     self.upsample = upsample
     self.act = get_act(act)
     f = lambda x: x
     self.in_conv_layer = f(nn.Conv1d(c_in, c_h, kernel_size=1))
     self.first_conv_layers = nn.ModuleList([
         f(nn.Conv1d(c_h, c_h, kernel_size=kernel_size))
         for _ in range(n_conv_blocks)
     ])
     self.second_conv_layers = nn.ModuleList([
         f(nn.Conv1d(c_h, c_h * up, kernel_size=kernel_size))
         for _, up in zip(range(n_conv_blocks), self.upsample)
     ])
     self.norm_layer = nn.InstanceNorm1d(c_h, affine=False)
     self.conv_affine_layers = nn.ModuleList(
         [f(nn.Linear(c_cond, c_h * 2)) for _ in range(n_conv_blocks * 2)])
     self.out_conv_layer = f(nn.Conv1d(c_h, c_out, kernel_size=1))
     self.dropout_layer = nn.Dropout(p=dropout_rate)
Ejemplo n.º 3
0
    def __init__(self, in_channels, out_channels, kernel_size, stride, padding):
        super(ResidualLayer, self).__init__()

        self.conv1d_layer = nn.Sequential(
            nn.Conv1d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=1,
                padding=padding,
            ),
            nn.InstanceNorm1d(num_features=out_channels, affine=True),
        )

        self.conv_layer_gates = nn.Sequential(
            nn.Conv1d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=1,
                padding=padding,
            ),
            nn.InstanceNorm1d(num_features=out_channels, affine=True),
        )

        self.conv1d_out_layer = nn.Sequential(
            nn.Conv1d(
                in_channels=out_channels,
                out_channels=in_channels,
                kernel_size=kernel_size,
                stride=1,
                padding=padding,
            ),
            nn.InstanceNorm1d(num_features=in_channels, affine=True),
        )
Ejemplo n.º 4
0
 def __init__(self, num_speakers=2) -> None:
     super(simple_CNN, self).__init__()
     self.convs = nn.Sequential(
         nn.Conv1d(1, 16, 100, stride=10),
         nn.BatchNorm1d(16),
         nn.ReLU(),
         nn.Conv1d(16, 64, 21, stride=10),
         nn.BatchNorm1d(64),
         nn.ReLU(),
         nn.Conv1d(64, 64, 5, stride=5),
         nn.BatchNorm1d(64),
         nn.ReLU(),
     )
     self.linears = nn.Sequential(nn.Linear(1 * 6 * 64, 128),
                                  nn.Linear(128, num_speakers))
Ejemplo n.º 5
0
def _test_conv1d_bias_true(test_case, device):
    np_arr = np.array(
        [
            [
                [0.90499806, -1.11683071, 0.71605605, -0.56754625, 0.61944169],
                [-0.31317389, -0.26271924, 0.95579433, 0.52468461, 1.48926127],
            ]
        ]
    )
    input = flow.tensor(
        np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True
    )
    weight = np.array(
        [
            [
                [0.01997352, 0.23834395, 0.00526353],
                [-0.04861857, -0.22751901, -0.06725175],
            ],
            [
                [0.13344523, -0.35202524, 0.15168799],
                [-0.25714493, -0.17459838, 0.28768948],
            ],
            [
                [0.10671382, -0.28205597, -0.39752254],
                [0.36393702, 0.07843742, -0.33898622],
            ],
            [
                [0.20485674, 0.04222689, -0.1898618],
                [0.22519711, -0.15910202, -0.35057363],
            ],
        ]
    )
    bias = np.array([0.01012857, 0.38912651, -0.01600273, -0.3883304])
    m = nn.Conv1d(2, 4, 3, stride=1, bias=True)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m.bias = flow.nn.Parameter(flow.Tensor(bias))
    m = m.to(device)
    np_out = np.array(
        [
            [
                [-0.22349545, -0.08447243, -0.37358052],
                [1.4130373, -0.04644597, 0.86949122],
                [-0.34765026, -0.31004351, -0.14158708],
                [-0.74985039, -0.87430149, -0.77354753],
            ]
        ]
    )
    output = m(input)
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array(
        [
            [
                [0.4649893, 0.11147892, -0.3189539, -0.78394318, -0.43043283],
                [0.28337064, -0.19941133, -0.66853344, -0.95190406, -0.46912211],
            ]
        ]
    )
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
Ejemplo n.º 6
0
def _test_conv1d_dilation(test_case, device):
    np_arr = np.array(
        [[[-0.43016902, 1.74619496, -0.57338119, 0.25563857, 0.12575546]]])
    input = flow.tensor(np_arr,
                        dtype=flow.float32,
                        device=flow.device(device),
                        requires_grad=True)
    weight = np.array([
        [[-0.35057205, -0.31304273, 0.46250814]],
        [[-0.40786612, 0.36518192, 0.46280444]],
        [[-0.00921835, -0.38710043, 0.47566161]],
    ])
    m = nn.Conv1d(1, 3, 3, stride=1, bias=False)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m = m.to(device)
    output = m(input)
    np_out = np.array([[
        [-0.66102189, -0.31443936, 0.17914855],
        [0.54776692, -0.8032915, 0.38541752],
        [-0.94472277, 0.32745653, -0.03385513],
    ]])
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array(
        [[[-0.76765651, -1.10261774, 0.29835641, 1.06601286, 1.40097415]]])
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06,
                                     1e-06))
Ejemplo n.º 7
0
def _test_conv1d_stride(test_case, device):
    np_arr = np.array(
        [[[-1.01312506, -0.40687919, 1.5985316, 0.53594196, -1.89935565]]])
    input = flow.tensor(np_arr,
                        dtype=flow.float32,
                        device=flow.device(device),
                        requires_grad=True)
    weight = np.array([
        [[0.5751484, 0.26589182, -0.026546]],
        [[-0.10313249, -0.20797005, -0.48268208]],
        [[-0.22216944, -0.14962578, 0.57433963]],
    ])
    m = nn.Conv1d(1, 3, 3, stride=2, bias=False)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m = m.to(device)
    output = m(input)
    np_out = np.array([[
        [-0.73331773, 1.11231577],
        [-0.58247775, 0.64046454],
        [1.20406508, -1.5262109],
    ]])
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array(
        [[[0.24984647, -0.09170401, 0.31495798, -0.09170401, 0.06511152]]])
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06,
                                     1e-06))
Ejemplo n.º 8
0
def _test_conv1d_bias_false(test_case, device):
    np_arr = np.array(
        [[[1.28795946, -0.2921792, 0.20338029, 0.78604293, -1.89607573]]])
    input = flow.tensor(np_arr,
                        dtype=flow.float32,
                        device=flow.device(device),
                        requires_grad=True)
    weight = np.array([
        [[0.10197904, 0.3372305, -0.25743008]],
        [[0.27720425, -0.52435774, -0.38381988]],
        [[0.56016803, -0.10063095, -0.10760903]],
    ])
    m = nn.Conv1d(1, 3, 3, stride=1, bias=False)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m = m.to(device)
    output = m(input)
    np_out = np.array([[
        [-0.01954307, -0.16356121, 0.77392507],
        [0.43217283, -0.48933625, 0.37196174],
        [0.72899038, -0.2687211, 0.23886177],
    ]])
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array(
        [[[0.93935132, 0.65159315, -0.09726584, -1.03661716, -0.74885899]]])
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06,
                                     1e-06))
Ejemplo n.º 9
0
def _test_conv1d_compilcate(test_case, device):
    np_arr = np.array([[
        [-1.00674784, 0.51784992, 0.39896572, 0.11018554, 0.91136694],
        [1.95886874, 0.89779067, 0.4748213, 0.33313531, -0.49350029],
        [-0.19280219, 0.04023677, 1.66438103, -0.83563608, 0.15925731],
        [1.49166429, 1.45189261, -1.86512125, 0.34329697, 0.20413807],
    ]])
    input = flow.tensor(np_arr,
                        dtype=flow.float32,
                        device=flow.device(device),
                        requires_grad=True)
    weight = np.array([
        [
            [-0.36045218, 0.37349278, 0.04565236],
            [0.0242328, -0.09459515, -0.30684742],
        ],
        [
            [-0.30345008, -0.1196513, -0.26765293],
            [0.09876197, 0.03346226, 0.2748405],
        ],
        [
            [-0.37798449, 0.00242459, -0.34125558],
            [-0.05174343, -0.10443231, 0.09526101],
        ],
        [
            [0.34196907, -0.32667893, 0.40264183],
            [0.38025281, 0.26807079, -0.09074812],
        ],
    ])
    bias = np.array([-0.03499984, -0.21616256, 0.13312563, -0.24104381])
    m = nn.Conv1d(4,
                  4,
                  3,
                  groups=2,
                  stride=2,
                  padding=2,
                  dilation=2,
                  bias=True)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m.bias = flow.nn.Parameter(flow.Tensor(bias))
    m = m.to(device)
    np_out = np.array([[
        [-0.72379637, 0.67248386, 0.21977007],
        [-0.00643994, -0.1286152, -0.41589433],
        [-0.76877236, 0.29273134, -0.42040929],
        [1.0612179, -0.73787093, -0.37839717],
    ]])
    output = m(input)
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array([[
        [-0.41006082, 0.0, -0.63206136, 0.0, 0.03184089],
        [0.06186188, 0.0, 0.02985496, 0.0, -0.09313981],
        [-0.36026976, 0.0, -0.2988835, 0.0, -0.26286808],
        [0.49214786, 0.0, 0.49666074, 0.0, 0.16815135],
    ]])
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06,
                                     1e-06))
Ejemplo n.º 10
0
 def __init__(
     self,
     c_in,
     c_h,
     c_out,
     kernel_size,
     bank_size,
     bank_scale,
     c_bank,
     n_conv_blocks,
     n_dense_blocks,
     subsample,
     act,
     dropout_rate,
 ):
     super(SpeakerEncoder, self).__init__()
     self.c_in = c_in
     self.c_h = c_h
     self.c_out = c_out
     self.kernel_size = kernel_size
     self.n_conv_blocks = n_conv_blocks
     self.n_dense_blocks = n_dense_blocks
     self.subsample = subsample
     self.act = get_act(act)
     self.conv_bank = nn.ModuleList([
         nn.Conv1d(c_in, c_bank, kernel_size=k)
         for k in range(bank_scale, bank_size + 1, bank_scale)
     ])
     in_channels = c_bank * (bank_size // bank_scale) + c_in
     self.in_conv_layer = nn.Conv1d(in_channels, c_h, kernel_size=1)
     self.first_conv_layers = nn.ModuleList([
         nn.Conv1d(c_h, c_h, kernel_size=kernel_size)
         for _ in range(n_conv_blocks)
     ])
     self.second_conv_layers = nn.ModuleList([
         nn.Conv1d(c_h, c_h, kernel_size=kernel_size, stride=sub)
         for sub, _ in zip(subsample, range(n_conv_blocks))
     ])
     self.pooling_layer = nn.AdaptiveAvgPool1d(1)
     self.first_dense_layers = nn.ModuleList(
         [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)])
     self.second_dense_layers = nn.ModuleList(
         [nn.Linear(c_h, c_h) for _ in range(n_dense_blocks)])
     self.output_layer = nn.Linear(c_h, c_out)
     self.dropout_layer = nn.Dropout(p=dropout_rate)
Ejemplo n.º 11
0
def _test_conv1d_group_large_out_bias_true(test_case, device):
    np_arr = np.array(
        [
            [
                [2.17964911, 0.91623521, 1.24746692, 0.73605931, -0.23738743],
                [-0.70412433, 0.10727754, 1.0207864, -0.09711888, -1.10814202],
            ]
        ]
    )
    input = flow.tensor(
        np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True
    )
    weight = np.array(
        [
            [[-0.207307473, 0.12856324, 0.371991515]],
            [[-0.416422307, 3.26921181e-05, -0.385845661]],
            [[-0.182592362, 0.143281639, 0.419321984]],
            [[-0.27117458, 0.0421470925, 0.377335936]],
            [[0.546190619, -0.211819887, -0.29785803]],
            [[0.334832489, 0.255918801, -0.0556600206]],
        ]
    )
    bias = np.array(
        [-0.56865668, 0.17631066, -0.43992457, -0.24307285, -0.53672957, -0.52927947]
    )
    m = nn.Conv1d(2, 6, 3, groups=2, stride=1, bias=True)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m.bias = flow.nn.Parameter(flow.Tensor(bias))
    m = m.to(device)
    np_out = np.array(
        [
            [
                [-0.43867296, -0.32441288, -0.82094181],
                [-1.21264362, -0.48919463, -0.25154343],
                [-0.18354186, -0.11983716, -0.66178048],
                [0.33756858, -0.26578707, -0.9421193],
                [-1.2480886, -0.66543078, 0.37145507],
                [-0.79440582, -0.22671542, -0.15066233],
            ]
        ]
    )
    output = m(input)
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array(
        [
            [
                [-0.8063221, -0.53444451, -0.12897667, 0.6773454, 0.40546784],
                [0.6098485, 0.69609451, 0.71991241, 0.1100639, 0.02381789],
            ]
        ]
    )
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
Ejemplo n.º 12
0
    def downsample(self, in_channels, out_channels, kernel_size, stride, padding):
        self.ConvLayer = nn.Sequential(
            nn.Conv1d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=padding,
            ),
            nn.InstanceNorm1d(num_features=out_channels, affine=True),
            GLU(),
        )

        return self.ConvLayer
Ejemplo n.º 13
0
def _test_conv1d_group_large_in_bias_true(test_case, device):
    np_arr = np.array(
        [
            [
                [0.7382921, 0.3227571, -0.73204273, -0.01697334, 1.72585976],
                [0.52866709, 0.28417364, 1.12931311, 1.73048413, -0.60748184],
                [0.43222603, 0.7882517, -0.62105948, 0.10097823, 0.81639361],
                [0.36671457, 0.24468753, -0.5824874, -0.74464536, -0.38901371],
            ]
        ]
    )
    input = flow.tensor(
        np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True
    )
    weight = np.array(
        [
            [
                [-0.29574063, -0.31176069, 0.17234495],
                [0.06092392, 0.30691007, -0.36685407],
            ],
            [
                [0.26149744, 0.07149458, 0.3209756],
                [0.18960869, -0.37148297, -0.13602243],
            ],
        ]
    )
    bias = np.array([-0.35048512, -0.0093792])
    m = nn.Conv1d(4, 2, 3, groups=2, stride=1, bias=True)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m.bias = flow.nn.Parameter(flow.Tensor(bias))
    m = m.to(device)
    np_out = np.array(
        [[[-1.09048378, -0.49156523, 0.99150705], [0.01852397, 0.54882324, 0.31657016]]]
    )
    output = m(input)
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array(
        [
            [
                [-0.29574063, -0.60750133, -0.43515638, -0.13941574, 0.17234495],
                [0.06092392, 0.36783397, 0.0009799, -0.059944, -0.36685407],
                [0.26149744, 0.33299202, 0.65396762, 0.39247018, 0.3209756],
                [0.18960869, -0.18187428, -0.31789672, -0.50750542, -0.13602243],
            ]
        ]
    )
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
Ejemplo n.º 14
0
 def __init__(
     self,
     in_channels=256,
     conv_channels=512,
     kernel_size=3,
     dilation=1,
     norm="cLN",
     causal=False,
 ):
     super(Conv1DBlock, self).__init__()
     # 1x1 conv
     self.conv1x1 = Conv1D(in_channels, conv_channels, 1)
     self.prelu1 = nn.PReLU()
     self.lnorm1 = build_norm(norm, conv_channels)
     dconv_pad = (
         (dilation * (kernel_size - 1)) // 2
         if not causal
         else (dilation * (kernel_size - 1))
     )
     # depthwise conv
     self.dconv = nn.Conv1d(
         conv_channels,
         conv_channels,
         kernel_size,
         groups=conv_channels,
         padding=dconv_pad,
         dilation=dilation,
         bias=True,
     )
     self.prelu2 = nn.PReLU()
     self.lnorm2 = build_norm(norm, conv_channels)
     # 1x1 conv cross channel
     self.sconv = nn.Conv1d(conv_channels, in_channels, 1, bias=True)
     # different padding way
     self.causal = causal
     self.dconv_pad = dconv_pad
Ejemplo n.º 15
0
def _test_conv1d_group_bias_true(test_case, device):
    np_arr = np.array(
        [
            [
                [1.48566079, 0.54937589, 0.62353903, -0.94114172, -0.60260266],
                [0.61150503, -0.50289607, 1.41735041, -1.85877609, -1.04875529],
            ]
        ]
    )
    input = flow.tensor(
        np_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True
    )
    weight = np.array(
        [
            [[0.25576305, 0.40814576, -0.05900212]],
            [[-0.24829513, 0.42756805, -0.01354307]],
            [[0.44658303, 0.46889144, 0.41060263]],
            [[0.30083328, -0.5221613, 0.12215579]],
        ]
    )
    bias = np.array([-0.03368823, -0.4212504, -0.42130581, -0.17434336])
    m = nn.Conv1d(2, 4, 3, groups=2, stride=1, bias=True)
    m.weight = flow.nn.Parameter(flow.Tensor(weight))
    m.bias = flow.nn.Parameter(flow.Tensor(bias))
    m = m.to(device)
    np_out = np.array(
        [
            [
                [0.53372419, 0.41684598, -0.22277816],
                [-0.56368178, -0.27830642, -0.97031319],
                [0.19794616, -0.74452549, -1.09052706],
                [0.44534814, -1.29277706, 1.09451222],
            ]
        ]
    )
    output = m(input)
    test_case.assertTrue(np.allclose(output.numpy(), np_out, 1e-06, 1e-06))
    output = output.sum()
    output.backward()
    np_grad = np.array(
        [
            [
                [0.00746793, 0.84318173, 0.77063656, 0.76316863, -0.07254519],
                [0.74741632, 0.69414645, 1.22690487, 0.47948855, 0.53275841],
            ]
        ]
    )
    test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-06, 1e-06))
Ejemplo n.º 16
0
 def __init__(self,
              in_planes,
              out_planes,
              kernel_size=3,
              stride=1,
              groups=1):
     padding = (kernel_size - 1) // 2
     super(ConvBNReLU, self).__init__(
         nn.Conv1d(
             in_planes,
             out_planes,
             kernel_size,
             stride,
             padding,
             groups=groups,
             bias=False,
         ),
         nn.BatchNorm1d(out_planes),
         nn.ReLU6(),
     )
Ejemplo n.º 17
0
    def __init__(self, channels, kernel_size, bias=True, dropout=0.0):
        super(ConformerConvolutionModule, self).__init__()

        assert kernel_size % 2 == 1

        self.pointwise_conv1 = nn.Linear(channels, 2 * channels, bias=bias)

        self.depthwise_conv = nn.Conv1d(
            channels,
            channels,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2,
            groups=channels,
            bias=bias,
        )

        self.batch_norm = nn.BatchNorm1d(channels)

        self.pointwise_conv2 = nn.Linear(channels, channels, bias=bias)

        self.dropout = nn.Dropout(dropout)
Ejemplo n.º 18
0
    def __init__(self, inp, oup, stride, expand_ratio):
        super(InvertedResidual, self).__init__()
        self.stride = stride
        assert stride in [1, 2]

        hidden_dim = int(round(inp * expand_ratio))
        self.use_res_connect = self.stride == 1 and inp == oup

        layers = []
        if expand_ratio != 1:
            # pw
            layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
        layers.extend([
            # dw
            ConvBNReLU(hidden_dim,
                       hidden_dim,
                       stride=stride,
                       groups=hidden_dim),
            # pw-linear
            nn.Conv1d(hidden_dim, oup, 1, 1, 0, bias=False),
            nn.BatchNorm1d(oup),
        ])
        self.conv = nn.Sequential(*layers)
Ejemplo n.º 19
0
    def __init__(self, hidden_size, vocab_size, blank=BLK, lookahead_steps=-1):
        super(CTCAssistor, self).__init__()

        self.lookahead_steps = lookahead_steps
        if self.lookahead_steps > 0:
            self.apply_look_ahead = True
            self.lookahead_conv = nn.Conv1d(
                in_channels=hidden_size,
                out_channels=hidden_size,
                kernel_size=self.lookahead_steps + 1,
                padding=0,
                stride=1,
                bias=False,
                groups=hidden_size,
            )
            logger.info(
                "Apply Lookahead Step in CTCAssistor And Set it to %d" % lookahead_steps
            )
        else:
            self.apply_look_ahead = False

        self.output_layer = nn.Linear(hidden_size, vocab_size)
        self.ctc_crit = nn.CTCLoss(blank=blank, zero_infinity=True)
Ejemplo n.º 20
0
 def __init__(
     self,
     c_in,
     c_h,
     c_out,
     kernel_size,
     bank_size,
     bank_scale,
     c_bank,
     n_conv_blocks,
     subsample,
     act,
     dropout_rate,
 ):
     super(ContentEncoder, self).__init__()
     self.n_conv_blocks = n_conv_blocks
     self.subsample = subsample
     self.act = get_act(act)
     self.conv_bank = nn.ModuleList([
         nn.Conv1d(c_in, c_bank, kernel_size=k)
         for k in range(bank_scale, bank_size + 1, bank_scale)
     ])
     in_channels = c_bank * (bank_size // bank_scale) + c_in
     self.in_conv_layer = nn.Conv1d(in_channels, c_h, kernel_size=1)
     self.first_conv_layers = nn.ModuleList([
         nn.Conv1d(c_h, c_h, kernel_size=kernel_size)
         for _ in range(n_conv_blocks)
     ])
     self.second_conv_layers = nn.ModuleList([
         nn.Conv1d(c_h, c_h, kernel_size=kernel_size, stride=sub)
         for sub, _ in zip(subsample, range(n_conv_blocks))
     ])
     self.norm_layer = nn.InstanceNorm1d(c_h, affine=False)
     self.mean_layer = nn.Conv1d(c_h, c_out, kernel_size=1)
     self.std_layer = nn.Conv1d(c_h, c_out, kernel_size=1)
     self.dropout_layer = nn.Dropout(p=dropout_rate)
Ejemplo n.º 21
0
    def __init__(self, options):
        super(SincNet, self).__init__()

        self.cnn_N_filt = options["cnn_N_filt"]
        self.cnn_len_filt = options["cnn_len_filt"]
        self.cnn_max_pool_len = options["cnn_max_pool_len"]

        self.cnn_act = options["cnn_act"]
        self.cnn_drop = options["cnn_drop"]

        self.cnn_use_laynorm = options["cnn_use_laynorm"]
        self.cnn_use_batchnorm = options["cnn_use_batchnorm"]
        self.cnn_use_laynorm_inp = options["cnn_use_laynorm_inp"]
        self.cnn_use_batchnorm_inp = options["cnn_use_batchnorm_inp"]

        self.input_dim = int(options["input_dim"])

        self.fs = options["fs"]

        self.N_cnn_lay = len(options["cnn_N_filt"])
        self.conv = nn.ModuleList([])
        self.bn = nn.ModuleList([])
        self.ln = nn.ModuleList([])
        self.act = nn.ModuleList([])
        self.drop = nn.ModuleList([])

        if self.cnn_use_laynorm_inp:
            self.ln0 = LayerNorm(self.input_dim)

        if self.cnn_use_batchnorm_inp:
            self.bn0 = nn.BatchNorm1d([self.input_dim], momentum=0.05)

        current_input = self.input_dim

        for i in range(self.N_cnn_lay):

            N_filt = int(self.cnn_N_filt[i])
            len_filt = int(self.cnn_len_filt[i])

            # dropout
            self.drop.append(nn.Dropout(p=self.cnn_drop[i]))

            # activation
            self.act.append(act_fun(self.cnn_act[i]))

            # layer norm initialization
            self.ln.append(
                LayerNorm((
                    N_filt,
                    int((current_input - self.cnn_len_filt[i] + 1) /
                        self.cnn_max_pool_len[i]),
                )))

            self.bn.append(
                nn.BatchNorm1d(
                    N_filt,
                    int((current_input - self.cnn_len_filt[i] + 1) /
                        self.cnn_max_pool_len[i]),
                    momentum=0.05,
                ))

            if i == 0:
                self.conv.append(
                    SincConv_fast(self.cnn_N_filt[0], self.cnn_len_filt[0],
                                  self.fs))
            else:
                self.conv.append(
                    nn.Conv1d(self.cnn_N_filt[i - 1], self.cnn_N_filt[i],
                              self.cnn_len_filt[i]))

            current_input = int((current_input - self.cnn_len_filt[i] + 1) /
                                self.cnn_max_pool_len[i])

        self.out_dim = current_input * N_filt
Ejemplo n.º 22
0
 def __init__(self, d_in, d_hid, dropout=0.1):
     super(PositionwiseFeedForwardUseConv, self).__init__()
     self.w_1 = nn.Conv1d(d_in, d_hid, 1)
     self.w_2 = nn.Conv1d(d_hid, d_in, 1)
     self.layer_norm = nn.LayerNorm(d_in)
     self.dropout = nn.Dropout(dropout)
Ejemplo n.º 23
0
    def __init__(self):
        super(Generator, self).__init__()

        # 2D Conv Layer
        self.conv1 = nn.Conv2d(
            in_channels=1,
            out_channels=128,
            kernel_size=(5, 15),
            stride=(1, 1),
            padding=(2, 7),
        )

        self.conv1_gates = nn.Conv2d(
            in_channels=1,
            out_channels=128,
            kernel_size=(5, 15),
            stride=1,
            padding=(2, 7),
        )

        # 2D Downsample Layer
        self.downSample1 = downSample_Generator(in_channels=128,
                                                out_channels=256,
                                                kernel_size=5,
                                                stride=2,
                                                padding=2)

        self.downSample2 = downSample_Generator(in_channels=256,
                                                out_channels=256,
                                                kernel_size=5,
                                                stride=2,
                                                padding=2)

        # 2D -> 1D Conv
        self.conv2dto1dLayer = nn.Sequential(
            nn.Conv1d(in_channels=2304,
                      out_channels=256,
                      kernel_size=1,
                      stride=1,
                      padding=0),
            nn.InstanceNorm1d(num_features=256, affine=True),
        )

        # Residual Blocks
        self.residualLayer1 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer2 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer3 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer4 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer5 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)
        self.residualLayer6 = ResidualLayer(in_channels=256,
                                            out_channels=512,
                                            kernel_size=3,
                                            stride=1,
                                            padding=1)

        # 1D -> 2D Conv
        self.conv1dto2dLayer = nn.Sequential(
            nn.Conv1d(in_channels=256,
                      out_channels=2304,
                      kernel_size=1,
                      stride=1,
                      padding=0),
            nn.InstanceNorm1d(num_features=2304, affine=True),
        )

        # UpSample Layer
        self.upSample1 = self.upSample(in_channels=256,
                                       out_channels=1024,
                                       kernel_size=5,
                                       stride=1,
                                       padding=2)

        self.upSample2 = self.upSample(in_channels=256,
                                       out_channels=512,
                                       kernel_size=5,
                                       stride=1,
                                       padding=2)

        self.lastConvLayer = nn.Conv2d(
            in_channels=128,
            out_channels=1,
            kernel_size=(5, 15),
            stride=(1, 1),
            padding=(2, 7),
        )
Ejemplo n.º 24
0
    def __init__(self, num_features, num_classes):
        super(Wav2Letter, self).__init__()

        self.layers = nn.Sequential(
            nn.Conv1d(num_features, 250, 48, 2),
            nn.ReLU(),
            nn.Conv1d(250, 250, 7),
            nn.ReLU(),
            nn.Conv1d(250, 250, 7),
            nn.ReLU(),
            nn.Conv1d(250, 250, 7),
            nn.ReLU(),
            nn.Conv1d(250, 250, 7),
            nn.ReLU(),
            nn.Conv1d(250, 250, 7),
            nn.ReLU(),
            nn.Conv1d(250, 250, 7),
            nn.ReLU(),
            nn.Conv1d(250, 250, 7),
            nn.ReLU(),
            nn.Conv1d(250, 2000, 32),
            nn.ReLU(),
            nn.Conv1d(2000, 2000, 1),
            nn.ReLU(),
            nn.Conv1d(2000, num_classes, 1),
        )
Ejemplo n.º 25
0
    def __init__(self, input_shape=(80, 64), residual_in_channels=256):
        super(Generator, self).__init__()
        Cx, Tx = input_shape
        self.flattened_channels = (Cx // 4) * residual_in_channels

        # 2D Conv Layer
        self.conv1 = nn.Conv2d(
            in_channels=2,
            out_channels=residual_in_channels // 2,
            kernel_size=(5, 15),
            stride=(1, 1),
            padding=(2, 7),
        )

        self.conv1_gates = nn.Conv2d(
            in_channels=2,
            out_channels=residual_in_channels // 2,
            kernel_size=(5, 15),
            stride=1,
            padding=(2, 7),
        )

        # 2D Downsampling Layers
        self.downSample1 = DownSampleGenerator(
            in_channels=residual_in_channels // 2,
            out_channels=residual_in_channels,
            kernel_size=5,
            stride=2,
            padding=2,
        )

        self.downSample2 = DownSampleGenerator(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels,
            kernel_size=5,
            stride=2,
            padding=2,
        )

        # 2D -> 1D Conv
        self.conv2dto1dLayer = nn.Conv1d(
            in_channels=self.flattened_channels,
            out_channels=residual_in_channels,
            kernel_size=1,
            stride=1,
            padding=0,
        )
        self.conv2dto1dLayer_tfan = nn.InstanceNorm1d(
            num_features=residual_in_channels, affine=True
        )

        # Residual Blocks
        self.residualLayer1 = ResidualLayer(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 2,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.residualLayer2 = ResidualLayer(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 2,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.residualLayer3 = ResidualLayer(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 2,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.residualLayer4 = ResidualLayer(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 2,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.residualLayer5 = ResidualLayer(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 2,
            kernel_size=3,
            stride=1,
            padding=1,
        )
        self.residualLayer6 = ResidualLayer(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 2,
            kernel_size=3,
            stride=1,
            padding=1,
        )

        # 1D -> 2D Conv
        self.conv1dto2dLayer = nn.Conv1d(
            in_channels=residual_in_channels,
            out_channels=self.flattened_channels,
            kernel_size=1,
            stride=1,
            padding=0,
        )
        self.conv1dto2dLayer_tfan = nn.InstanceNorm1d(
            num_features=self.flattened_channels, affine=True
        )

        # UpSampling Layers
        self.upSample1 = self.upsample(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 4,
            kernel_size=5,
            stride=1,
            padding=2,
        )

        self.glu = GLU()

        self.upSample2 = self.upsample(
            in_channels=residual_in_channels,
            out_channels=residual_in_channels * 2,
            kernel_size=5,
            stride=1,
            padding=2,
        )

        # 2D Conv Layer
        self.lastConvLayer = nn.Conv2d(
            in_channels=residual_in_channels // 2,
            out_channels=1,
            kernel_size=(5, 15),
            stride=(1, 1),
            padding=(2, 7),
        )