Esempio n. 1
0
    def __init__(self, num_features=13):
        super(Xvector_TDNN, self).__init__()
        out_vector_dim = 1500
        self.frame1 = TDNN(input_dim=num_features, output_dim=512, context_size=5, dilation=1)
        self.frame2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2)
        self.frame3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=3)
        self.frame4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1)
        self.frame5 = TDNN(input_dim=512, output_dim=out_vector_dim, context_size=1, dilation=1)

        self.fc1 = torch.nn.Linear(2*out_vector_dim, 500)
        self.fc2 = torch.nn.Linear(500, 1)
 def __init__(self):
     super(decoder, self).__init__()
     self.tdnn_1 = TDNN(input_dim=32,
                        output_dim=32,
                        context_size=3,
                        dilation=1)
     self.tdnn_2 = TDNN(input_dim=32,
                        output_dim=32,
                        context_size=3,
                        dilation=1)
     self.test = nn.Sequential(
         nn.AvgPool2d((38, 1), stride=1),
         nn.Linear(32, 11),
     )
     self.output = nn.Softmax(dim=1)
 def __init__(self):
     super(classifier2, self).__init__()
     self.tdnn_0 = TDNN(input_dim=40,
                        output_dim=32,
                        context_size=3,
                        dilation=1,
                        stride=3)
Esempio n. 4
0
 def __init__(self, feat_dim, output_dim):
     super(FrameCompressor, self).__init__()
     self.frame1 = TDNN(input_dim=feat_dim,
                        output_dim=512,
                        context_size=5,
                        dilation=1)
     self.frame2 = TDNN(input_dim=512,
                        output_dim=512,
                        context_size=3,
                        dilation=2)
     self.frame3 = TDNN(input_dim=512,
                        output_dim=512,
                        context_size=3,
                        dilation=3)
     self.frame4 = TDNN(input_dim=512,
                        output_dim=output_dim,
                        context_size=1,
                        dilation=1)
 def __init__(self):
     super(classifier, self).__init__()
     self.tdnn = TDNN(input_dim=32,
                      output_dim=64,
                      context_size=3,
                      dilation=1)
     self.tdnn2 = TDNN(input_dim=64,
                       output_dim=128,
                       context_size=3,
                       dilation=1)
     self.classifier = nn.Sequential(
         # nn.Linear(31*32, 31*32),
         # nn.BatchNorm1d(31*32),
         # nn.ReLU(inplace=True),
         nn.Linear(38 * 128, 1776),
         nn.BatchNorm1d(1776),
         nn.ReLU(inplace=True),
     )
     self.output = nn.Softmax(dim=1)
Esempio n. 6
0
    def __init__(self, input_dim = 40, class_num=2):
        super(X_vector, self).__init__()

        self.tdnn1 = TDNN(input_dim=input_dim, output_dim=512, context_size=5, dilation=1,dropout_p=0.5)
        self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2,dropout_p=0.5)
        self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=3,dropout_p=0.5)
        self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=4, dilation=4,dropout_p=0.5)
        self.segment5 = nn.Linear(512, 1500)
        self.segment6 = nn.Linear(3000, 512)
        self.segment7 = nn.Linear(512, 512)

        self.criterion = AdaCos(512, class_num)

        self.id2spk={}
        with open('speakers.txt', 'r') as f:
            lines = f.readlines()
            for line in lines:
                spk = line.strip().split()[0]
                id = int(line.strip().split()[1])
                self.id2spk[id] = spk
Esempio n. 7
0
 def __init__(self,
              num_inputs=1,
              sincnet=True,
              kwidth=641,
              stride=160,
              fmaps=128,
              norm_type='bnorm',
              pad_mode='reflect',
              sr=16000,
              emb_dim=256,
              activation=None,
              rnn_pool=False,
              rnn_layers=1,
              rnn_dropout=0,
              rnn_type='qrnn',
              name='TDNNFe'):
     super().__init__(name=name)
     # apply sincnet at first layer
     self.sincnet = sincnet
     self.emb_dim = emb_dim
     ninp = num_inputs
     if self.sincnet:
         self.feblock = FeBlock(ninp,
                                fmaps,
                                kwidth,
                                stride,
                                1,
                                act=activation,
                                pad_mode=pad_mode,
                                norm_type=norm_type,
                                sincnet=True,
                                sr=sr)
         ninp = fmaps
     # 2 is just a random number because it is not used
     # with unpooled method
     self.tdnn = TDNN(ninp, 2, method='unpooled')
     fmap = self.tdnn.emb_dim
     # last projection
     if rnn_pool:
         self.rnn = build_rnn_block(fmap,
                                    emb_dim // 2,
                                    rnn_layers=rnn_layers,
                                    rnn_type=rnn_type,
                                    bidirectional=True,
                                    dropout=rnn_dropout)
         self.W = nn.Conv1d(emb_dim, emb_dim, 1)
     else:
         self.W = nn.Conv1d(fmap, emb_dim, 1)
     self.rnn_pool = rnn_pool
    def __init__(self, params, path='../../../', flag=False):
        super(Embedding, self).__init__()

        self.params = params

        if flag == True:
            word_embed = np.load(path + 'data/super/word_embeddings.npy')
        else:
            word_embed = np.load(path + 'data/word_embeddings.npy')

        self.word_embed = nn.Embedding(params.word_vocab_size,
                                       params.word_embed_size)
        self.char_embed = nn.Embedding(params.char_vocab_size,
                                       params.char_embed_size)
        self.word_embed.weight = Parameter(t.from_numpy(word_embed).float(),
                                           requires_grad=False)
        self.char_embed.weight = Parameter(
            t.Tensor(params.char_vocab_size,
                     params.char_embed_size).uniform_(-1, 1))

        self.TDNN = TDNN(self.params)