Beispiel #1
0
    def __init__(self, feat_dim, output_dim):
        super(Net, self).__init__()

        def get_tdnnf_layer(input_dim, layer_dim):
            return nn.Sequential(
                TDNNFBatchNorm(
                    input_dim,
                    layer_dim,
                    context_len=3,
                    orthonormal_constraint=-1.0,
                    bypass_scale=0.75,
                    bottleneck_dim=160,
                ), nn.Dropout(0.1))

        self.tdnnf_layers = nn.Sequential(
            TDNNFBatchNorm(feat_dim,
                           1536,
                           context_len=3,
                           orthonormal_constraint=-1.0,
                           bottleneck_dim=160),
            nn.Dropout(0.1),
            *[get_tdnnf_layer(1536, 1536) for i in range(2, 4)],
            TDNNFBatchNorm(1536,
                           1536,
                           context_len=3,
                           subsampling_factor=3,
                           orthonormal_constraint=-1.0,
                           bottleneck_dim=160),
            nn.Dropout(0.1),
            *[get_tdnnf_layer(1536, 1536) for i in range(5, 18)],
            OrthonormalLinear(1536, 256, scale=-1.0),
            nn.Dropout(0.1),
        )
        self.chain_layers = nn.Sequential(
            OrthonormalLinear(256, 1536, scale=-1.0),
            nn.Dropout(0.1),
            OrthonormalLinear(1536, 256, scale=-1.0),
            nn.Dropout(0.1),
            NaturalAffineTransform(256, output_dim),
        )
        self.chain_layers[-1].weight.data.zero_()
        self.chain_layers[-1].bias.data.zero_()
        self.xent_layers = nn.Sequential(
            OrthonormalLinear(256, 1536, scale=-1.0),
            nn.Dropout(0.1),
            OrthonormalLinear(1536, 256, scale=-1.0),
            nn.Dropout(0.1),
            NaturalAffineTransform(256, output_dim),
        )
        self.xent_layers[-1].weight.data.zero_()
        self.xent_layers[-1].bias.data.zero_()
        self.output_dim = output_dim
Beispiel #2
0
 def __init__(self, feat_dim, output_dim):
     super(Net, self).__init__()
     self.input_dim = feat_dim
     self.output_dim = output_dim
     self.tdnn = nn.Sequential(
         TDNNFBatchNorm(feat_dim, 512, 160, context_len=5, orthornomal_constraint=-1.0),
         TDNNFBatchNorm(512, 512, 160, context_len=3, orthornomal_constraint=-1.0),
         TDNNFBatchNorm(512, 512, 160, context_len=3, subsampling_factor=3, orthornomal_constraint=-1.0),
         TDNNFBatchNorm(512, 512, 160, context_len=3, orthornomal_constraint=-1.0),
         TDNNFBatchNorm(512, 512, 160, context_len=3, orthornomal_constraint=-1.0),
         TDNNFBatchNorm(512, 512, 160, context_len=3, orthornomal_constraint=-1.0),
     )
     self.lstm = nn.LSTM(512, 256, 2, batch_first=True)
     self.chain = nn.Sequential(
         TDNNFBatchNorm(256, 256, 160, context_len=1, orthornomal_constraint=-1.0),
         NaturalAffineTransform(256, output_dim),
     )
     self.xent = nn.Sequential(
         TDNNFBatchNorm(256, 256, 160, context_len=1, orthornomal_constraint=-1.0),
         NaturalAffineTransform(256, output_dim),
     )
     self.chain[-1].weight.data.zero_()
     self.chain[-1].bias.data.zero_()
     self.xent[-1].weight.data.zero_()
     self.xent[-1].bias.data.zero_()
Beispiel #3
0
 def __init__(self, feat_dim, output_dim):
     super(Net, self).__init__()
     self.input_dim = feat_dim
     self.output_dim = output_dim
     self.init_blstm = nn.LSTM(feat_dim, 256, 1, batch_first=True, bidirectional=True)
     self.final_blstm = nn.LSTM(512, 256, 4, batch_first=True, bidirectional=True)
     self.chain = nn.Sequential(
         TDNNFBatchNorm(512, 256, 160, context_len=1, orthonormal_constraint=-1.0),
         NaturalAffineTransform(256, output_dim),
     )
     self.xent = nn.Sequential(
         TDNNFBatchNorm(512, 256, 160, context_len=1, orthonormal_constraint=-1.0),
         NaturalAffineTransform(256, output_dim),
     )
     self.chain[-1].weight.data.zero_()
     self.chain[-1].bias.data.zero_()
     self.xent[-1].weight.data.zero_()
     self.xent[-1].bias.data.zero_()
Beispiel #4
0
 def get_tdnnf_layer(input_dim, layer_dim):
     return nn.Sequential(
         TDNNFBatchNorm(
             input_dim,
             layer_dim,
             context_len=3,
             orthonormal_constraint=-1.0,
             bypass_scale=0.75,
             bottleneck_dim=160,
         ), nn.Dropout(0.1))
Beispiel #5
0
    def __init__(self,
                 feat_dim,
                 output_dim,
                 hidden_dim=1024,
                 bottleneck_dim=128,
                 prefinal_bottleneck_dim=256,
                 kernel_size_list=[3, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3],
                 subsampling_factor_list=[1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1],
                 frame_subsampling_factor=3,
                 p_dropout=0.1):
        super().__init__()

        # at present, we support only frame_subsampling_factor to be 3
        assert frame_subsampling_factor == 3

        assert len(kernel_size_list) == len(subsampling_factor_list)
        num_layers = len(kernel_size_list)
        input_dim = feat_dim

        #input_dim = feat_dim * 3 + ivector_dim
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.output_subsampling = frame_subsampling_factor

        # manually calculated
        self.padding = 27
        self.frame_subsampling_factor = frame_subsampling_factor

        self.tdnn1 = TDNNFBatchNorm(
            input_dim,
            hidden_dim,
            bottleneck_dim=bottleneck_dim,
            context_len=kernel_size_list[0],
            subsampling_factor=subsampling_factor_list[0],
            orthonormal_constraint=-1.0,
        )
        self.dropout1 = nn.Dropout(p_dropout)
        tdnnfs = []
        for i in range(1, num_layers):
            kernel_size = kernel_size_list[i]
            subsampling_factor = subsampling_factor_list[i]
            layer = TDNNFBatchNorm(
                hidden_dim,
                hidden_dim,
                bottleneck_dim=bottleneck_dim,
                context_len=kernel_size,
                subsampling_factor=subsampling_factor,
                orthonormal_constraint=-1.0,
            )
            tdnnfs.append(layer)
            dropout_layer = nn.Dropout(p_dropout)
            tdnnfs.append(dropout_layer)

        # tdnnfs requires [N, C, T]
        self.tdnnfs = nn.ModuleList(tdnnfs)

        # prefinal_l affine requires [N, C, T]
        self.prefinal_chain = TDNNFBatchNorm(
            hidden_dim,
            hidden_dim,
            bottleneck_dim=prefinal_bottleneck_dim,
            context_len=1,
            orthonormal_constraint=-1.0,
        )
        self.prefinal_xent = TDNNFBatchNorm(
            hidden_dim,
            hidden_dim,
            bottleneck_dim=prefinal_bottleneck_dim,
            context_len=1,
            orthonormal_constraint=-1.0,
        )
        self.chain_output = pkwrap.nn.NaturalAffineTransform(
            hidden_dim, output_dim)
        self.chain_output.weight.data.zero_()
        self.chain_output.bias.data.zero_()

        self.xent_output = pkwrap.nn.NaturalAffineTransform(
            hidden_dim, output_dim)
        self.xent_output.weight.data.zero_()
        self.xent_output.bias.data.zero_()
        self.validate_model()