Ejemplo n.º 1
0
    def __init__(self,
                 in_channels,
                 out_channels,
                 activation,
                 dropout=0.0,
                 use_laynorm=False,
                 use_batchnorm=False,
                 kernel_size=1,
                 stride=1):
        super(TDNNLayer, self).__init__()
        self.in_channels = in_channels
        self.drop = nn.Dropout(p=dropout)
        self.act = act_fun(activation)

        assert not (use_laynorm and use_batchnorm)
        if use_laynorm:
            # pytorch-kaldi uses LayerNorm here, but their batches have [N * L,C] and we have [N,C,L] so InstanceNorm is the eqivalent here
            self.ln = nn.InstanceNorm1d(out_channels,
                                        momentum=0.0,
                                        affine=True)
            add_bias = False
        elif use_batchnorm:
            add_bias = False
            self.bn = nn.BatchNorm1d(out_channels, momentum=0.05)
        else:
            add_bias = True

        # Linear operations
        self.conv = nn.Conv1d(in_channels,
                              out_channels,
                              kernel_size,
                              stride,
                              bias=add_bias)
Ejemplo n.º 2
0
    def __init__(self, input_dim, dnn_lay, dnn_drop, dnn_act, dnn_use_laynorm,
                 dnn_use_batchnorm):
        super(MLPLayer, self).__init__()
        self.input_dim = input_dim
        self.dnn_use_laynorm = dnn_use_laynorm
        self.dnn_use_batchnorm = dnn_use_batchnorm
        self.drop = nn.Dropout(p=dnn_drop)
        self.act = act_fun(dnn_act)

        add_bias = True

        # layer norm initialization
        self.ln = LayerNorm(dnn_lay)
        self.bn = nn.BatchNorm1d(dnn_lay, momentum=0.05)

        if self.dnn_use_laynorm or self.dnn_use_batchnorm:
            add_bias = False

        # Linear operations
        self.wx = nn.Linear(input_dim, dnn_lay, bias=add_bias)

        # weight initialization
        self.wx.weight = torch.nn.Parameter(
            torch.Tensor(dnn_lay, input_dim).uniform_(
                -np.sqrt(0.01 / (input_dim + dnn_lay)),
                np.sqrt(0.01 / (input_dim + dnn_lay))))
        self.wx.bias = torch.nn.Parameter(torch.zeros(dnn_lay))
Ejemplo n.º 3
0
    def __init__(self, N_in_feats, N_filters, kernel_sizes, max_pool_len,
                 activation, dropout):
        super(CNN, self).__init__()
        self.N_in_feats = N_in_feats

        self.N_lay = len(N_filters)

        self.layers = nn.ModuleList([])

        self.receptive_field = 32

        prev_N_filters = N_in_feats

        for i in range(self.N_lay):
            if i != 0:
                prev_N_filters = N_filters[i - 1]

            self.layers.append(
                ConvLayer(prev_N_filters, N_filters[i], kernel_sizes[i],
                          max_pool_len[i], act_fun(activation[i]), dropout[i]))

        assert self.receptive_field == cnn_receptive_field(self), \
            "receptive_field mismatch, set the actual receptive field of {}".format(cnn_receptive_field(self))

        self.N_out_feats = self.layers[-1].N_out_filters
Ejemplo n.º 4
0
    def __init__(self, num_input_feats, input_context, N_filters, kernel_sizes,
                 max_pool_len, use_laynorm, use_batchnorm, use_laynorm_inp,
                 use_batchnorm_inp, activation, dropout):
        super(CNN, self).__init__()
        self.num_input_feats = num_input_feats
        self.context = input_context
        self.use_laynorm_inp = use_laynorm_inp
        self.use_batchnorm_inp = use_batchnorm_inp

        self.N_lay = len(N_filters)

        self.layers = nn.ModuleList([])

        if self.use_laynorm_inp:
            self.layer_norm0 = LayerNorm(self.num_input_feats * self.context)

        if self.use_batchnorm_inp:
            raise NotImplementedError
            # self.batch_norm0 = nn.BatchNorm1d([num_input], momentum=0.05)

        current_input = self.num_input_feats

        for i in range(self.N_lay):
            if i == 0:
                prev_N_filters = 1
            else:
                prev_N_filters = N_filters[i - 1]

            self.layers.append(
                CNNLayer(current_input, prev_N_filters, N_filters[i],
                         kernel_sizes[i], max_pool_len[i], use_laynorm[i],
                         use_batchnorm[i], act_fun(activation[i]), dropout[i]))

            current_input = (current_input - kernel_sizes[i] +
                             1) // max_pool_len[i]

        self.out_dim = current_input * N_filters[-1]
Ejemplo n.º 5
0
    def __init__(self, options, inp_dim):
        super(minimalGRU, self).__init__()

        # Reading parameters
        self.input_dim = inp_dim
        self.minimalgru_lay = options['minimalgru_lay']
        self.minimalgru_drop = options['minimalgru_drop']
        self.minimalgru_use_batchnorm = options['minimalgru_use_batchnorm']
        self.minimalgru_use_laynorm = options['minimalgru_use_laynorm']
        self.minimalgru_use_laynorm_inp = options['minimalgru_use_laynorm_inp']
        self.minimalgru_use_batchnorm_inp = options[
            'minimalgru_use_batchnorm_inp']
        self.minimalgru_orthinit = options['minimalgru_orthinit']
        self.minimalgru_act = options['minimalgru_act']
        self.bidir = options['minimalgru_bidir']
        self.use_cuda = options['use_cuda']
        self.to_do = options['to_do']

        if self.to_do == 'train':
            self.test_flag = False
        else:
            self.test_flag = True

        # List initialization
        self.wh = nn.ModuleList([])
        self.uh = nn.ModuleList([])

        self.wz = nn.ModuleList([])  # Update Gate
        self.uz = nn.ModuleList([])  # Update Gate

        self.ln = nn.ModuleList([])  # Layer Norm
        self.bn_wh = nn.ModuleList([])  # Batch Norm
        self.bn_wz = nn.ModuleList([])  # Batch Norm

        self.act = nn.ModuleList([])  # Activations

        # Input layer normalization
        if self.minimalgru_use_laynorm_inp:
            self.ln0 = LayerNorm(self.input_dim)

        # Input batch normalization
        if self.minimalgru_use_batchnorm_inp:
            self.bn0 = nn.BatchNorm1d(self.input_dim, momentum=0.05)

        self.N_minimalgru_lay = len(self.minimalgru_lay)

        current_input = self.input_dim

        # Initialization of hidden layers

        for i in range(self.N_minimalgru_lay):

            # Activations
            self.act.append(act_fun(self.minimalgru_act[i]))

            add_bias = True

            if self.minimalgru_use_laynorm[i] or self.minimalgru_use_batchnorm[
                    i]:
                add_bias = False

            # Feed-forward connections
            self.wh.append(
                nn.Linear(current_input, self.minimalgru_lay[i],
                          bias=add_bias))
            self.wz.append(
                nn.Linear(current_input, self.minimalgru_lay[i],
                          bias=add_bias))

            # Recurrent connections
            self.uh.append(
                nn.Linear(self.minimalgru_lay[i],
                          self.minimalgru_lay[i],
                          bias=False))
            self.uz.append(
                nn.Linear(self.minimalgru_lay[i],
                          self.minimalgru_lay[i],
                          bias=False))

            if self.minimalgru_orthinit:
                nn.init.orthogonal_(self.uh[i].weight)
                nn.init.orthogonal_(self.uz[i].weight)

            # batch norm initialization
            self.bn_wh.append(
                nn.BatchNorm1d(self.minimalgru_lay[i], momentum=0.05))
            self.bn_wz.append(
                nn.BatchNorm1d(self.minimalgru_lay[i], momentum=0.05))

            self.ln.append(LayerNorm(self.minimalgru_lay[i]))

            if self.bidir:
                current_input = 2 * self.minimalgru_lay[i]
            else:
                current_input = self.minimalgru_lay[i]

        self.out_dim = self.minimalgru_lay[
            i] + self.bidir * self.minimalgru_lay[i]
Ejemplo n.º 6
0
    def __init__(self, options, inp_dim):
        super(SincNet, self).__init__()

        # Reading parameters
        self.input_dim = inp_dim
        self.sinc_N_filt = options['sinc_N_filt']

        self.sinc_len_filt = options['sinc_len_filt']
        self.sinc_max_pool_len = options['sinc_max_pool_len']

        self.sinc_act = options['sinc_act']
        self.sinc_drop = options['sinc_drop']

        self.sinc_use_laynorm = options['sinc_use_laynorm']
        self.sinc_use_batchnorm = options['sinc_use_batchnorm']
        self.sinc_use_laynorm_inp = options['sinc_use_laynorm_inp']
        self.sinc_use_batchnorm_inp = options['sinc_use_batchnorm_inp']

        self.N_sinc_lay = len(self.sinc_N_filt)

        self.sinc_sample_rate = options['sinc_sample_rate']
        self.sinc_min_low_hz = options['sinc_min_low_hz']
        self.sinc_min_band_hz = options['sinc_min_band_hz']

        self.conv = nn.ModuleList([])
        self.bn = nn.ModuleList([])
        self.ln = nn.ModuleList([])
        self.act = nn.ModuleList([])
        self.drop = nn.ModuleList([])

        if self.sinc_use_laynorm_inp:
            self.ln0 = LayerNorm(self.input_dim)

        if self.sinc_use_batchnorm_inp:
            self.bn0 = nn.BatchNorm1d([self.input_dim], momentum=0.05)

        current_input = self.input_dim

        for i in range(self.N_sinc_lay):

            N_filt = int(self.sinc_N_filt[i])
            len_filt = int(self.sinc_len_filt[i])

            # dropout
            self.drop.append(nn.Dropout(p=self.sinc_drop[i]))

            # activation
            self.act.append(act_fun(self.sinc_act[i]))

            # layer norm initialization
            self.ln.append(
                LayerNorm([
                    N_filt,
                    int((current_input - self.sinc_len_filt[i] + 1) /
                        self.sinc_max_pool_len[i])
                ]))

            self.bn.append(
                nn.BatchNorm1d(
                    N_filt,
                    int((current_input - self.sinc_len_filt[i] + 1) /
                        self.sinc_max_pool_len[i]),
                    momentum=0.05))

            if i == 0:
                self.conv.append(
                    SincConv(1,
                             N_filt,
                             len_filt,
                             sample_rate=self.sinc_sample_rate,
                             min_low_hz=self.sinc_min_low_hz,
                             min_band_hz=self.sinc_min_band_hz))

            else:
                self.conv.append(
                    nn.Conv1d(self.sinc_N_filt[i - 1], self.sinc_N_filt[i],
                              self.sinc_len_filt[i]))

            current_input = int((current_input - self.sinc_len_filt[i] + 1) /
                                self.sinc_max_pool_len[i])

        self.out_dim = current_input * N_filt
Ejemplo n.º 7
0
    def __init__(self, options, inp_dim):
        super(LSTM, self).__init__()

        # Reading parameters
        self.input_dim = inp_dim
        self.lstm_lay = options['lstm_lay']
        self.lstm_drop = options['lstm_drop']
        self.lstm_use_batchnorm = options['lstm_use_batchnorm']
        self.lstm_use_laynorm = options['lstm_use_laynorm']
        self.lstm_use_laynorm_inp = options['lstm_use_laynorm_inp']
        self.lstm_use_batchnorm_inp = options['lstm_use_batchnorm_inp']
        self.lstm_act = options['lstm_act']
        self.lstm_orthinit = options['lstm_orthinit']

        assert all([isinstance(elem, int) for elem in self.lstm_lay])
        assert all([isinstance(elem, float) for elem in self.lstm_drop])
        assert all(
            [isinstance(elem, bool) for elem in self.lstm_use_batchnorm])
        assert all([isinstance(elem, bool) for elem in self.lstm_use_laynorm])
        assert isinstance(self.lstm_use_laynorm_inp, bool)
        assert isinstance(self.lstm_use_batchnorm_inp, bool)
        assert all([isinstance(elem, str) for elem in self.lstm_act])
        assert isinstance(self.lstm_orthinit, bool)

        self.bidir = options['lstm_bidir']

        if self.training:
            self.test_flag = False
        else:
            self.test_flag = True

        # List initialization
        self.wfx = nn.ModuleList([])  # Forget
        self.ufh = nn.ModuleList([])  # Forget

        self.wix = nn.ModuleList([])  # Input
        self.uih = nn.ModuleList([])  # Input

        self.wox = nn.ModuleList([])  # Output
        self.uoh = nn.ModuleList([])  # Output

        self.wcx = nn.ModuleList([])  # Cell state
        self.uch = nn.ModuleList([])  # Cell state

        self.ln = nn.ModuleList([])  # Layer Norm
        self.bn_wfx = nn.ModuleList([])  # Batch Norm
        self.bn_wix = nn.ModuleList([])  # Batch Norm
        self.bn_wox = nn.ModuleList([])  # Batch Norm
        self.bn_wcx = nn.ModuleList([])  # Batch Norm

        self.act = nn.ModuleList([])  # Activations

        # Input layer normalization
        if self.lstm_use_laynorm_inp:
            self.ln0 = LayerNorm(self.input_dim)

        # Input batch normalization
        if self.lstm_use_batchnorm_inp:
            self.bn0 = nn.BatchNorm1d(self.input_dim, momentum=0.05)

        self.N_lstm_lay = len(self.lstm_lay)

        current_input = self.input_dim

        # Initialization of hidden layers

        for i in range(self.N_lstm_lay):

            # Activations
            self.act.append(act_fun(self.lstm_act[i]))

            add_bias = True

            if self.lstm_use_laynorm[i] or self.lstm_use_batchnorm[i]:
                add_bias = False

            # Feed-forward connections
            self.wfx.append(
                nn.Linear(current_input, self.lstm_lay[i], bias=add_bias))
            self.wix.append(
                nn.Linear(current_input, self.lstm_lay[i], bias=add_bias))
            self.wox.append(
                nn.Linear(current_input, self.lstm_lay[i], bias=add_bias))
            self.wcx.append(
                nn.Linear(current_input, self.lstm_lay[i], bias=add_bias))

            # Recurrent connections
            self.ufh.append(
                nn.Linear(self.lstm_lay[i], self.lstm_lay[i], bias=False))
            self.uih.append(
                nn.Linear(self.lstm_lay[i], self.lstm_lay[i], bias=False))
            self.uoh.append(
                nn.Linear(self.lstm_lay[i], self.lstm_lay[i], bias=False))
            self.uch.append(
                nn.Linear(self.lstm_lay[i], self.lstm_lay[i], bias=False))

            if self.lstm_orthinit:
                nn.init.orthogonal_(self.ufh[i].weight)
                nn.init.orthogonal_(self.uih[i].weight)
                nn.init.orthogonal_(self.uoh[i].weight)
                nn.init.orthogonal_(self.uch[i].weight)

            # batch norm initialization
            self.bn_wfx.append(nn.BatchNorm1d(self.lstm_lay[i], momentum=0.05))
            self.bn_wix.append(nn.BatchNorm1d(self.lstm_lay[i], momentum=0.05))
            self.bn_wox.append(nn.BatchNorm1d(self.lstm_lay[i], momentum=0.05))
            self.bn_wcx.append(nn.BatchNorm1d(self.lstm_lay[i], momentum=0.05))

            self.ln.append(LayerNorm(self.lstm_lay[i]))

            if self.bidir:
                current_input = 2 * self.lstm_lay[i]
            else:
                current_input = self.lstm_lay[i]

        self.out_dim = self.lstm_lay[i] + self.bidir * self.lstm_lay[i]