Exemple #1
0
	def __init__(self, n_z=256, ncoef=13, proj_size=0, sm_type='none'):
		super(lcnn_9layers, self).__init__()

		self.conv1 = nn.Conv2d(1, 16, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False)
		self.bn1 = nn.BatchNorm2d(16)
		self.activation = nn.ELU()

		self.features = nn.Sequential(
			mfm(16, 48, 5, 1, 2), 
			nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 
			group(48, 96, 3, 1, 1), 
			nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True),
			group(96, 192, 3, 1, 1),
			nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True), 
			group(192, 128, 3, 1, 1),
			group(128, 128, 3, 1, 1),
			nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True) )

		self.attention = SelfAttention(128)
		self.fc = nn.Linear(128,128)

		self.fc1 = mfm(128, 128, type=0)
		self.fc2 = nn.Linear(128, n_z)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #2
0
	def __init__(self, block=resblock, layers=[1, 2, 3, 4], n_z=256, ncoef=13, proj_size=0, sm_type='none'):
		super(lcnn_29layers_v2, self).__init__()

		self.conv1_ = nn.Conv2d(1, 16, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False)
		self.bn1 = nn.BatchNorm2d(16)
		self.activation = nn.ELU()

		self.conv1 = mfm(16, 48, 5, 1, 2)
		self.block1 = self._make_layer(block, layers[0], 48, 48)
		self.group1 = group(48, 96, 3, 1, 1)
		self.block2 = self._make_layer(block, layers[1], 96, 96)
		self.group2 = group(96, 192, 3, 1, 1)
		self.block3 = self._make_layer(block, layers[2], 192, 192)
		self.group3 = group(192, 128, 3, 1, 1)
		self.block4 = self._make_layer(block, layers[3], 128, 128)
		self.group4 = group(128, 128, 3, 1, 1)

		self.attention = SelfAttention(128)
		self.fc = nn.Linear(128,128)

		self.fc1 = nn.Linear(128, n_z)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #3
0
	def __init__(self, n_z=256, layers=[3,4,6,3], block=Bottleneck, proj_size=0, ncoef=23, sm_type='none'):
		self.inplanes = 16
		super(ResNet_lstm, self).__init__()
	
		self.conv1 = nn.Conv2d(1, 16, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False)
		self.bn1 = nn.BatchNorm2d(16)
		self.activation = nn.ELU()
		
		self.layer1 = self._make_layer(block, 16, layers[0],stride=1)
		self.layer2 = self._make_layer(block, 32, layers[1], stride=1)
		self.layer3 = self._make_layer(block, 64, layers[2], stride=2)
		self.layer4 = self._make_layer(block, 128, layers[3], stride=2)

		self.lstm = nn.LSTM(512, 256, 2, bidirectional=True, batch_first=False)

		self.fc = nn.Linear(512+256,512)
		self.lbn = nn.BatchNorm1d(512)

		self.fc_mu = nn.Linear(512, n_z)

		self.initialize_params()

		self.attention = SelfAttention(512)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #4
0
	def __init__(self, n_z=256, layers=[3,4,6,3], block=PreActBlock, proj_size=0, ncoef=23, sm_type='none', delta=False):
		self.in_planes = 16
		super(ResNet_2d, self).__init__()

		self.conv1 = nn.Conv2d(3 if delta else 1, 16, kernel_size=3, stride=1, padding=1, bias=False)

		self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
		self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
		self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
		self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

		self.conv_out = nn.Conv2d(block.expansion*512, 512, kernel_size=(6,1), stride=1, padding=0, bias=False)

		self.fc = nn.Linear(512*2,512)
		self.lbn = nn.BatchNorm1d(512)

		self.fc_mu = nn.Linear(512, n_z)

		self.initialize_params()

		self.attention = SelfAttention(512)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #5
0
	def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False):
		super(TDNN_logpool, self).__init__()
		self.delta=delta
		self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 3, dilation=2, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 3, dilation=3, padding=3),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 1500, 1),
			nn.BatchNorm1d(1500),
			nn.ReLU(inplace=True) )

		self.pooling = StatisticalPooling()

		self.post_pooling_1_1 = nn.Linear(1500, 512)
		self.post_pooling_1_2 = nn.Sequential(nn.BatchNorm1d(512), nn.ReLU(inplace=True) )

		self.post_pooling_2_1 = nn.Linear(512, 512)
		self.post_pooling_2_2 = nn.Sequential(nn.BatchNorm1d(512), nn.ReLU(inplace=True) )
		self.post_pooling_2_3 = nn.Linear(512, proj_size)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=proj_size, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=proj_size, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #6
0
	def __init__(self, n_z=256, layers=[3,4,6,3], block=PreActBottleneck, proj_size=0, ncoef=23, sm_type='none', delta=False):
		self.in_planes = 32
		super(ResNet_qrnn, self).__init__()

		self.conv1 = nn.Conv2d(3 if delta else 1, 32, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False)

		self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
		self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
		self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
		self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

		from torchqrnn import QRNN

		self.qrnn = QRNN(block.expansion*512, 512, num_layers=2, dropout=0.3)

		self.fc = nn.Linear(1536,512)
		self.lbn = nn.BatchNorm1d(512)

		self.fc_mu = nn.Linear(512, n_z)

		self.initialize_params()

		self.attention = SelfAttention(512)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #7
0
    def __init__(self, n_z=256, proj_size=0, ncoef=100, sm_type='none'):
        super(TDNN_mfcc, self).__init__()

        self.model = nn.Sequential(
            nn.BatchNorm1d(ncoef), nn.Conv1d(ncoef, 512, 5, padding=2),
            nn.BatchNorm1d(512), nn.ReLU(inplace=True),
            nn.Conv1d(512, 512, 3, dilation=2, padding=2), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512,
                                             512,
                                             3,
                                             dilation=3,
                                             padding=3), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1),
            nn.BatchNorm1d(1500), nn.ReLU(inplace=True))

        self.pooling = StatisticalPooling()

        self.post_pooling = nn.Sequential(nn.Conv1d(3000, 512, 1),
                                          nn.BatchNorm1d(512),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(512, 512, 1),
                                          nn.BatchNorm1d(512),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(512, n_z, 1))

        if proj_size > 0 and sm_type != 'none':
            if sm_type == 'softmax':
                self.out_proj = Softmax(input_features=n_z,
                                        output_features=proj_size)
            elif sm_type == 'am_softmax':
                self.out_proj = AMSoftmax(input_features=n_z,
                                          output_features=proj_size)
            else:
                raise NotImplementedError
Exemple #8
0
	def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none'):
		super(cnn_lstm_mfcc, self).__init__()

		self.features = nn.Sequential(
			nn.Conv2d(1, 32, kernel_size=(ncoef,3), padding=(0,2), stride=(1,1), bias=False),
			nn.BatchNorm2d(32),
			nn.ELU(),
			nn.Conv2d(32, 64, kernel_size=(1,5), padding=(0,1), stride=(1,2), bias=False),
			nn.BatchNorm2d(64),
			nn.ELU(),
			nn.Conv2d(64, 128, kernel_size=(1,5), padding=(0,1), stride=(1,2), bias=False),
			nn.BatchNorm2d(128),
			nn.ELU(),
			nn.Conv2d(128, 256, kernel_size=(1,5), padding=(0,1), stride=(1,2), bias=False),
			nn.BatchNorm2d(256),
			nn.ELU() )

		self.lstm = nn.LSTM(256, 512, 2, bidirectional=True, batch_first=False)

		self.fc_mu = nn.Sequential(
			nn.Linear(512*2, n_z) )

		self.initialize_params()

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #9
0
	def __init__(self, n_z=256, ncoef=13, proj_size=0, sm_type='none'):
		'''
		The FTDNN architecture from
		"State-of-the-art speaker recognition with neural network embeddings in 
		NIST SRE18 and Speakers in the Wild evaluations"
		https://www.sciencedirect.com/science/article/pii/S0885230819302700
		'''
		super(FTDNN, self).__init__()

		self.layer01 = TDNN_(input_dim=ncoef, output_dim=512, context_size=5, padding=2)
		self.layer02 = FTDNNLayer(512, 1024, 256, context_size=2, dilations=[ 2, 2, 2], paddings=[1, 1, 1])
		self.layer03 = FTDNNLayer(1024, 1024, 256, context_size=1, dilations=[1, 1, 1], paddings=[0, 0, 0])
		self.layer04 = FTDNNLayer(1024, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1])
		self.layer05 = FTDNNLayer(2048, 1024, 256, context_size=1, dilations=[1, 1, 1], paddings=[0, 0, 0])
		self.layer06 = FTDNNLayer(1024, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1])
		self.layer07 = FTDNNLayer(3072, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1])
		self.layer08 = FTDNNLayer(1024, 1024, 256, context_size=2, dilations=[3, 3, 2], paddings=[2, 1, 1])
		self.layer09 = FTDNNLayer(3072, 1024, 256, context_size=1, dilations=[1, 1, 1], paddings=[0, 0, 0])
		self.layer10 = DenseReLU(1024, 2048)
		self.layer11 = StatsPool()

		self.post_pooling_1 = DenseReLU(4096, 512)

		self.post_pooling_2 = DenseReLU(512, n_z)

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #10
0
	def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False):
		super(transformer_enc, self).__init__()
		self.delta=delta
		self.pre_encoder = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 7),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 5),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True) )

		self.transformer_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=512, nhead=8, dim_feedforward=768, dropout=0.1), num_layers=5, norm=nn.LayerNorm(512) )

		self.pooling = StatisticalPooling()

		self.post_pooling_1 = nn.Sequential(nn.Conv1d(1024, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True) )

		self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, n_z, 1) )

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #11
0
	def __init__(self, n_z=256, nh=1, n_h=512, layers=[3,4,23,3], block=PreActBottleneck, proj_size=100, ncoef=23, dropout_prob=0.25, sm_type='softmax'):
		self.in_planes = 32
		super(ResNet_large, self).__init__()

		self.conv1 = nn.Conv2d(1, 32, kernel_size=(ncoef,3), stride=(1,1), padding=(0,1), bias=False)
		self.bn1 = nn.BatchNorm2d(32)
		self.activation = nn.ReLU()
		
		self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
		self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
		self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
		self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

		self.fc = nn.Linear(block.expansion*512*2,512)
		self.lbn = nn.BatchNorm1d(512)

		self.fc_mu = nn.Linear(512, n_z)

		self.classifier = self.make_bin_layers(n_in=2*n_z, n_h_layers=nh, h_size=n_h, dropout_p=dropout_prob)

		self.initialize_params()

		self.attention = SelfAttention(block.expansion*512)

		if sm_type=='softmax':
			self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
		elif sm_type=='am_softmax':
			self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
		else:
			raise NotImplementedError
Exemple #12
0
    def __init__(self,
                 pase_cfg,
                 pase_cp=None,
                 n_z=256,
                 proj_size=0,
                 ncoef=100,
                 sm_type='none'):
        super(global_MLP, self).__init__()

        self.encoder = wf_builder(pase_cfg)
        if pase_cp:
            self.encoder.load_pretrained(pase_cp,
                                         load_last=True,
                                         verbose=False)

        self.model = nn.Sequential(nn.Linear(ncoef, 512), nn.BatchNorm1d(512),
                                   nn.ReLU(inplace=True), nn.Linear(512, 512),
                                   nn.BatchNorm1d(512), nn.ReLU(inplace=True),
                                   nn.Linear(512, n_z))

        if proj_size > 0 and sm_type != 'none':
            if sm_type == 'softmax':
                self.out_proj = Softmax(input_features=n_z,
                                        output_features=proj_size)
            elif sm_type == 'am_softmax':
                self.out_proj = AMSoftmax(input_features=n_z,
                                          output_features=proj_size)
            else:
                raise NotImplementedError
Exemple #13
0
    def __init__(self,
                 pase_cfg,
                 pase_cp=None,
                 n_z=256,
                 layers=[2, 2, 2, 2],
                 block=PreActBlock,
                 proj_size=0,
                 ncoef=23,
                 sm_type='none'):
        self.in_planes = 16
        super(ResNet_18, self).__init__()

        self.model = nn.ModuleList()

        self.model.append(
            nn.Sequential(
                nn.Conv2d(1,
                          16,
                          kernel_size=(2 * ncoef, 3),
                          stride=(1, 1),
                          padding=(0, 1),
                          bias=False), nn.BatchNorm2d(16), nn.ReLU()))

        self.model.append(self._make_layer(block, 64, layers[0], stride=1))
        self.model.append(self._make_layer(block, 128, layers[1], stride=2))
        self.model.append(self._make_layer(block, 256, layers[2], stride=2))
        self.model.append(self._make_layer(block, 512, layers[3], stride=2))

        self.initialize_params()

        self.pooling = SelfAttention(block.expansion * 512)

        self.post_pooling = nn.Sequential(
            nn.Conv1d(block.expansion * 512 * 2, 512, 1), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512, n_z, 1))

        if proj_size > 0 and sm_type != 'none':
            if sm_type == 'softmax':
                self.out_proj = Softmax(input_features=n_z,
                                        output_features=proj_size)
            elif sm_type == 'am_softmax':
                self.out_proj = AMSoftmax(input_features=n_z,
                                          output_features=proj_size)
            else:
                raise NotImplementedError

        ## Load after initialize main model params
        self.encoder = wf_builder(pase_cfg)
        if pase_cp:
            self.encoder.load_pretrained(pase_cp,
                                         load_last=True,
                                         verbose=False)
Exemple #14
0
    def __init__(self,
                 pase_cfg,
                 pase_cp=None,
                 n_layers=4,
                 n_z=256,
                 proj_size=0,
                 ncoef=23,
                 sm_type='none'):
        super(pyr_rnn, self).__init__()

        self.model = nn.ModuleList(
            [nn.LSTM(2 * ncoef, 256, 1, bidirectional=True, batch_first=True)])

        for i in range(1, n_layers):
            self.model.append(
                nn.LSTM(256 * 2 * 2,
                        256,
                        1,
                        bidirectional=True,
                        batch_first=True))

        self.pooling = StatisticalPooling()

        self.post_pooling = nn.Sequential(nn.Conv1d(256 * 2 * 2 * 2, 512, 1),
                                          nn.BatchNorm1d(512),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(512, 512, 1),
                                          nn.BatchNorm1d(512),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(512, n_z, 1))

        self.initialize_params()

        self.attention = SelfAttention(512)

        if proj_size > 0 and sm_type != 'none':
            if sm_type == 'softmax':
                self.out_proj = Softmax(input_features=n_z,
                                        output_features=proj_size)
            elif sm_type == 'am_softmax':
                self.out_proj = AMSoftmax(input_features=n_z,
                                          output_features=proj_size)
            else:
                raise NotImplementedError

        self.encoder = wf_builder(pase_cfg)
        if pase_cp:
            self.encoder.load_pretrained(pase_cp,
                                         load_last=True,
                                         verbose=False)
Exemple #15
0
    def __init__(self,
                 pase_cfg,
                 pase_cp=None,
                 n_z=256,
                 proj_size=0,
                 ncoef=100,
                 sm_type='none'):
        super(TDNN, self).__init__()

        self.encoder = wf_builder(pase_cfg)
        if pase_cp:
            self.encoder.load_pretrained(pase_cp,
                                         load_last=True,
                                         verbose=False)

        self.model = nn.Sequential(
            nn.BatchNorm1d(2 * ncoef), nn.Conv1d(2 * ncoef, 512, 5, padding=2),
            nn.BatchNorm1d(512), nn.ReLU(inplace=True),
            nn.Conv1d(512, 512, 3, dilation=2, padding=2), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512,
                                             512,
                                             3,
                                             dilation=3,
                                             padding=3), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512, 512, 1), nn.BatchNorm1d(512),
            nn.ReLU(inplace=True), nn.Conv1d(512, 1500, 1),
            nn.BatchNorm1d(1500), nn.ReLU(inplace=True))

        self.pooling = StatisticalPooling()

        self.post_pooling = nn.Sequential(nn.Conv1d(3000, 512, 1),
                                          nn.BatchNorm1d(512),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(512, 512, 1),
                                          nn.BatchNorm1d(512),
                                          nn.ReLU(inplace=True),
                                          nn.Conv1d(512, n_z, 1))

        if proj_size > 0 and sm_type != 'none':
            if sm_type == 'softmax':
                self.out_proj = Softmax(input_features=n_z,
                                        output_features=proj_size)
            elif sm_type == 'am_softmax':
                self.out_proj = AMSoftmax(input_features=n_z,
                                          output_features=proj_size)
            else:
                raise NotImplementedError
Exemple #16
0
	def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False):
		super().__init__()

		self.delta = delta

		self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 5, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 5, padding=3),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 7),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 1500, 1),
			nn.BatchNorm1d(1500),
			nn.ReLU(inplace=True) )

		self.ASPP_block = ASPP(1500, 1500)

		self.post_pooling_1 = nn.Sequential(nn.Conv1d(1500, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True) )

		self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, n_z, 1) )

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #17
0
	def __init__(self, n_z=256, proj_size=0, ncoef=23, n_heads=4, sm_type='none', delta=False):
		super(TDNN_multihead, self).__init__()
		self.delta=delta

		self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 5, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 5, padding=3),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 7),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 1500, 1),
			nn.BatchNorm1d(1500),
			nn.ReLU(inplace=True) )

		self.attention = nn.TransformerEncoderLayer(d_model=1500, nhead=n_heads, dim_feedforward=512, dropout=0.1)
		self.pooling = StatisticalPooling()

		self.post_pooling_1 = nn.Sequential(nn.Conv1d(1500*2, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True) )

		self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, n_z, 1) )

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #18
0
	def __init__(self, n_z=256, proj_size=0, ncoef=23, sm_type='none', delta=False):
		super(TDNN_lstm, self).__init__()
		self.delta=delta

		self.model = nn.Sequential( nn.Conv1d(3*ncoef if delta else ncoef, 512, 5, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 5, padding=2),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 5, padding=3),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 512, 7),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, 1500, 1),
			nn.BatchNorm1d(1500),
			nn.ReLU(inplace=True) )

		self.pooling = nn.LSTM(1500, 512, 2, bidirectional=True, batch_first=False)
		self.attention = SelfAttention(1024)

		self.post_pooling_1 = nn.Sequential(nn.Conv1d(2560, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True) )

		self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1),
			nn.BatchNorm1d(512),
			nn.ReLU(inplace=True),
			nn.Conv1d(512, n_z, 1) )

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #19
0
	def __init__(self, n_z=256, ncoef=13, proj_size=0, sm_type='none', n_heads=16):
		super().__init__()

		self.model_1 = nn.Sequential( nn.Conv1d(ncoef, 512, 5, padding=2, bias=False),
			nn.ReLU(inplace=True),
			nn.BatchNorm1d(512) )
		self.model_2 = nn.Sequential( nn.Conv1d(512, 512, 5, padding=2, bias=False),
			nn.ReLU(inplace=True),
			nn.BatchNorm1d(512) )
		self.model_3 = nn.Sequential( nn.Conv1d(512, 512, 5, padding=3, bias=False),
			nn.ReLU(inplace=True),
			nn.BatchNorm1d(512) )
		self.model_4 = nn.Sequential( nn.Conv1d(512, 512, 7, bias=False),
			nn.ReLU(inplace=True),
			nn.BatchNorm1d(512) )
		self.model_5 = nn.Sequential( nn.Conv1d(512, 512, 1, bias=False),
			nn.ReLU(inplace=True),
			nn.BatchNorm1d(512) )

		self.stats_pooling = StatisticalPooling()

		self.multihead_pooling = nn.TransformerEncoderLayer(d_model=1024, nhead=n_heads, dim_feedforward=512, dropout=0.1)

		self.post_pooling_1 = nn.Sequential(nn.Linear(1024, 512, bias=False),
			nn.ReLU(inplace=True),
			nn.BatchNorm1d(512) )

		self.post_pooling_2 = nn.Sequential(nn.Linear(512, 512, bias=False),
			nn.ReLU(inplace=True),
			nn.BatchNorm1d(512),
			nn.Linear(512, n_z) )

		if proj_size>0 and sm_type!='none':
			if sm_type=='softmax':
				self.out_proj=Softmax(input_features=n_z, output_features=proj_size)
			elif sm_type=='am_softmax':
				self.out_proj=AMSoftmax(input_features=n_z, output_features=proj_size)
			else:
				raise NotImplementedError
Exemple #20
0
    def __init__(self,
                 n_z=256,
                 nh=1,
                 n_h=512,
                 layers=[3, 4, 23, 3],
                 block=PreActBottleneck,
                 proj_size=100,
                 ncoef=23,
                 dropout_prob=0.25,
                 sm_type='softmax',
                 ndiscriminators=1,
                 r_proj_size=0):
        self.in_planes = 32
        super(ResNet_large, self).__init__()

        self.ndiscriminators = ndiscriminators
        self.r_proj_size = r_proj_size
        self.classifier = nn.ModuleList()
        self.dropout_prob = dropout_prob
        self.n_hidden = nh
        self.hidden_size = n_h
        self.latent_size = n_z
        self.sm_type = sm_type
        self.ncoef = ncoef

        self.conv1 = nn.Conv2d(1,
                               32,
                               kernel_size=(ncoef, 3),
                               stride=(1, 1),
                               padding=(0, 1),
                               bias=False)

        self.layer1 = self._make_layer(block, 64, layers[0], stride=1)
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)

        self.fc = nn.Linear(block.expansion * 512 * 2, 512)
        self.lbn = nn.BatchNorm1d(512)

        self.fc_mu = nn.Linear(512, n_z)

        self.initialize_params()

        if ndiscriminators > 1:
            for i in range(self.ndiscriminators):
                self.classifier.append(
                    self.make_bin_layers(n_in=2 * 512,
                                         n_h_layers=nh,
                                         h_size=n_h,
                                         dropout_p=dropout_prob))
        else:
            self.classifier = self.make_bin_layers(n_in=2 * 512,
                                                   n_h_layers=nh,
                                                   h_size=n_h,
                                                   dropout_p=dropout_prob)

        self.attention = SelfAttention(block.expansion * 512)

        if sm_type == 'softmax':
            self.out_proj = Softmax(input_features=n_z,
                                    output_features=proj_size)
        elif sm_type == 'am_softmax':
            self.out_proj = AMSoftmax(input_features=n_z,
                                      output_features=proj_size)
        else:
            raise NotImplementedError
Exemple #21
0
    def __init__(self,
                 n_z=256,
                 nh=1,
                 n_h=512,
                 proj_size=0,
                 ncoef=23,
                 sm_type='none',
                 dropout_prob=0.25,
                 ndiscriminators=1,
                 r_proj_size=0):
        super(TDNN, self).__init__()

        self.ndiscriminators = ndiscriminators
        self.r_proj_size = r_proj_size
        self.classifier = nn.ModuleList()
        self.dropout_prob = dropout_prob
        self.n_hidden = nh
        self.hidden_size = n_h
        self.latent_size = n_z
        self.sm_type = sm_type
        self.ncoef = ncoef

        self.model = nn.Sequential(nn.Conv1d(ncoef, 512, 5, padding=2),
                                   nn.BatchNorm1d(512), nn.ReLU(inplace=True),
                                   nn.Conv1d(512, 512, 5, padding=2),
                                   nn.BatchNorm1d(512), nn.ReLU(inplace=True),
                                   nn.Conv1d(512, 512, 5, padding=3),
                                   nn.BatchNorm1d(512), nn.ReLU(inplace=True),
                                   nn.Conv1d(512, 512, 7), nn.BatchNorm1d(512),
                                   nn.ReLU(inplace=True),
                                   nn.Conv1d(512, 1500, 1),
                                   nn.BatchNorm1d(1500), nn.ReLU(inplace=True))

        self.pooling = StatisticalPooling()

        self.post_pooling_1 = nn.Sequential(nn.Conv1d(3000, 512, 1),
                                            nn.BatchNorm1d(512),
                                            nn.ReLU(inplace=True))

        self.post_pooling_2 = nn.Sequential(nn.Conv1d(512, 512, 1),
                                            nn.BatchNorm1d(512),
                                            nn.ReLU(inplace=True),
                                            nn.Conv1d(512, n_z, 1))

        if ndiscriminators > 1:
            for i in range(self.ndiscriminators):
                self.classifier.append(
                    self.make_bin_layers(n_in=2 * 512,
                                         n_h_layers=nh,
                                         h_size=n_h,
                                         dropout_p=dropout_prob))
        else:
            self.classifier = self.make_bin_layers(n_in=2 * 512,
                                                   n_h_layers=nh,
                                                   h_size=n_h,
                                                   dropout_p=dropout_prob)

        if proj_size > 0 and sm_type != 'none':
            if sm_type == 'softmax':
                self.out_proj = Softmax(input_features=n_z,
                                        output_features=proj_size)
            elif sm_type == 'am_softmax':
                self.out_proj = AMSoftmax(input_features=n_z,
                                          output_features=proj_size)
            else:
                raise NotImplementedError
Exemple #22
0
if args.model == 'lcnn29_mfcc' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.lcnn_29layers_v2(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('lcnn29_mfcc', mu.size())
if args.model == 'TDNN' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.TDNN(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('TDNN', mu.size())
if args.model == 'TDNN_multipool' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.TDNN_multipool(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('TDNN_multipool', mu.size())
if args.model == 'FTDNN' or args.model == 'all':
	batch = torch.rand(3, 1, args.ncoef, 400)
	model = model_.FTDNN(n_z=args.latent_size, ncoef=args.ncoef)
	mu = model.forward(batch)
	print('FTDNN', mu.size())

if args.softmax:
	batch = torch.rand(3, mu.size(0))
	batch_labels = torch.randint(low=0, high=10, size=(mu.size(0),))

	amsm = AMSoftmax(input_features=batch.size(1), output_features=10)
	sm = Softmax(input_features=batch.size(1), output_features=10)

	print('amsm', amsm(batch, batch_labels).size())
	print('sm', sm(batch).size())