def __init__(self, num_features=13): super(Xvector_TDNN, self).__init__() out_vector_dim = 1500 self.frame1 = TDNN(input_dim=num_features, output_dim=512, context_size=5, dilation=1) self.frame2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2) self.frame3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=3) self.frame4 = TDNN(input_dim=512, output_dim=512, context_size=1, dilation=1) self.frame5 = TDNN(input_dim=512, output_dim=out_vector_dim, context_size=1, dilation=1) self.fc1 = torch.nn.Linear(2*out_vector_dim, 500) self.fc2 = torch.nn.Linear(500, 1)
def __init__(self): super(decoder, self).__init__() self.tdnn_1 = TDNN(input_dim=32, output_dim=32, context_size=3, dilation=1) self.tdnn_2 = TDNN(input_dim=32, output_dim=32, context_size=3, dilation=1) self.test = nn.Sequential( nn.AvgPool2d((38, 1), stride=1), nn.Linear(32, 11), ) self.output = nn.Softmax(dim=1)
def __init__(self): super(classifier2, self).__init__() self.tdnn_0 = TDNN(input_dim=40, output_dim=32, context_size=3, dilation=1, stride=3)
def __init__(self, feat_dim, output_dim): super(FrameCompressor, self).__init__() self.frame1 = TDNN(input_dim=feat_dim, output_dim=512, context_size=5, dilation=1) self.frame2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2) self.frame3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=3) self.frame4 = TDNN(input_dim=512, output_dim=output_dim, context_size=1, dilation=1)
def __init__(self): super(classifier, self).__init__() self.tdnn = TDNN(input_dim=32, output_dim=64, context_size=3, dilation=1) self.tdnn2 = TDNN(input_dim=64, output_dim=128, context_size=3, dilation=1) self.classifier = nn.Sequential( # nn.Linear(31*32, 31*32), # nn.BatchNorm1d(31*32), # nn.ReLU(inplace=True), nn.Linear(38 * 128, 1776), nn.BatchNorm1d(1776), nn.ReLU(inplace=True), ) self.output = nn.Softmax(dim=1)
def __init__(self, input_dim = 40, class_num=2): super(X_vector, self).__init__() self.tdnn1 = TDNN(input_dim=input_dim, output_dim=512, context_size=5, dilation=1,dropout_p=0.5) self.tdnn2 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=2,dropout_p=0.5) self.tdnn3 = TDNN(input_dim=512, output_dim=512, context_size=3, dilation=3,dropout_p=0.5) self.tdnn4 = TDNN(input_dim=512, output_dim=512, context_size=4, dilation=4,dropout_p=0.5) self.segment5 = nn.Linear(512, 1500) self.segment6 = nn.Linear(3000, 512) self.segment7 = nn.Linear(512, 512) self.criterion = AdaCos(512, class_num) self.id2spk={} with open('speakers.txt', 'r') as f: lines = f.readlines() for line in lines: spk = line.strip().split()[0] id = int(line.strip().split()[1]) self.id2spk[id] = spk
def __init__(self, num_inputs=1, sincnet=True, kwidth=641, stride=160, fmaps=128, norm_type='bnorm', pad_mode='reflect', sr=16000, emb_dim=256, activation=None, rnn_pool=False, rnn_layers=1, rnn_dropout=0, rnn_type='qrnn', name='TDNNFe'): super().__init__(name=name) # apply sincnet at first layer self.sincnet = sincnet self.emb_dim = emb_dim ninp = num_inputs if self.sincnet: self.feblock = FeBlock(ninp, fmaps, kwidth, stride, 1, act=activation, pad_mode=pad_mode, norm_type=norm_type, sincnet=True, sr=sr) ninp = fmaps # 2 is just a random number because it is not used # with unpooled method self.tdnn = TDNN(ninp, 2, method='unpooled') fmap = self.tdnn.emb_dim # last projection if rnn_pool: self.rnn = build_rnn_block(fmap, emb_dim // 2, rnn_layers=rnn_layers, rnn_type=rnn_type, bidirectional=True, dropout=rnn_dropout) self.W = nn.Conv1d(emb_dim, emb_dim, 1) else: self.W = nn.Conv1d(fmap, emb_dim, 1) self.rnn_pool = rnn_pool
def __init__(self, params, path='../../../', flag=False): super(Embedding, self).__init__() self.params = params if flag == True: word_embed = np.load(path + 'data/super/word_embeddings.npy') else: word_embed = np.load(path + 'data/word_embeddings.npy') self.word_embed = nn.Embedding(params.word_vocab_size, params.word_embed_size) self.char_embed = nn.Embedding(params.char_vocab_size, params.char_embed_size) self.word_embed.weight = Parameter(t.from_numpy(word_embed).float(), requires_grad=False) self.char_embed.weight = Parameter( t.Tensor(params.char_vocab_size, params.char_embed_size).uniform_(-1, 1)) self.TDNN = TDNN(self.params)