def __init__(self, hparams): super(Spotify, self).__init__() self.conv0 = nn.Sequential( nn.Conv1d(hparams.num_mels, 32, kernel_size=4, stride=1, padding=0), nn.BatchNorm1d(32), nn.LeakyReLU(), nn.MaxPool1d(8, stride=8) ) self.conv1 = nn.Sequential( nn.Conv1d(32, 32, kernel_size=4, stride=1, padding=0), nn.BatchNorm1d(32), nn.LeakyReLU(), nn.MaxPool1d(4, stride=4) ) self.conv2 = nn.Sequential( nn.Conv1d(32, 64, kernel_size=4, stride=1, padding=0), nn.BatchNorm1d(64), nn.LeakyReLU(), nn.MaxPool1d(4, stride=4) ) self.pool0 = nn.AvgPool1d(4, stride=4) self.pool1 = nn.MaxPool1d(4, stride=4) self.pool2 = nn.LPPool1d(2, kernel_size=4, stride=4) self.pool3 = nn.LPPool1d(1, kernel_size=4, stride=4) self.fc1 = nn.Sequential( nn.Dropout(0.3), nn.Linear(192, 64) ) self.fc2 = nn.Linear(64, 64) self.fc3 = nn.Linear(64, len(hparams.genres))
def __init__(self): super(NNPoolingModule, self).__init__() self.input1d = torch.randn(1, 16, 50) self.module1d = nn.ModuleList([ nn.MaxPool1d(3, stride=2), nn.AvgPool1d(3, stride=2), nn.LPPool1d(2, 3, stride=2), nn.AdaptiveMaxPool1d(3), nn.AdaptiveAvgPool1d(3), ]) self.input2d = torch.randn(1, 16, 30, 10) self.module2d = nn.ModuleList([ nn.MaxPool2d((3, 2), stride=(2, 1)), nn.AvgPool2d((3, 2), stride=(2, 1)), nn.FractionalMaxPool2d(3, output_ratio=(0.5, 0.5)), nn.LPPool2d(2, 3, stride=(2, 1)), nn.AdaptiveMaxPool2d((5, 7)), nn.AdaptiveAvgPool2d((7)), ]) self.input3d = torch.randn(1, 16, 20, 4, 4) self.module3d = nn.ModuleList([ nn.MaxPool3d(2), nn.AvgPool3d(2), nn.FractionalMaxPool3d(2, output_ratio=(0.5, 0.5, 0.5)), nn.AdaptiveMaxPool3d((5, 7, 9)), nn.AdaptiveAvgPool3d((5, 7, 9)), ])
def forward(self, src_tokens, src_lengths, prev_output_tokens): encoder_out = self.encoder(src_tokens, src_lengths) decoder_out = self.decoder(prev_output_tokens, encoder_out) if self.prop_predict: if 'pool' in self.fp_type: fp = encoder_out['encoder_out'] seqz, bsz, csz = fp.shape if self.fp_type == 'avgpool': pool = nn.AvgPool1d(seqz) elif self.fp_type == 'maxpool': pool = nn.MaxPool1d(seqz) else: pool = nn.LPPool1d(2, seqz) fp = torch.transpose(torch.transpose(fp, 0, 1), 1, 2) fp = pool(fp) fp = fp.contiguous().view(bsz, csz) else: # 'first' # Use the encoder output of the first token <GO> as FP. fp = encoder_out['encoder_out'][0, :, :] if self.get_fp: return fp pred_out = self.prednet(fp) return decoder_out, pred_out else: return decoder_out
def __init__(self): super(Model, self).__init__() self.pool_0 = nn.LPPool1d(norm_type=2, kernel_size=3) self.pool_1 = nn.LPPool1d(norm_type=2, kernel_size=4, stride=2) self.pool_2 = nn.LPPool1d(norm_type=1, kernel_size=3, stride=1, ceil_mode=False) self.pool_3 = nn.LPPool1d(norm_type=1, kernel_size=5, stride=1, ceil_mode=True) self.pool_4 = nn.LPPool1d(norm_type=1.2, kernel_size=3, stride=2, ceil_mode=False) self.pool_5 = nn.LPPool1d(norm_type=0.5, kernel_size=2, stride=1, ceil_mode=True) self.pool_6 = nn.LPPool1d(norm_type=0.1, kernel_size=4, stride=1, ceil_mode=False)
def __init__(self, mode, nfilters, samplerate=16000, wlen=25, wstride=10, compression='log', preemp=False, mvn=False): super(TDFbanks, self).__init__() window_size = samplerate * wlen // 1000 + 1 window_stride = samplerate * wstride // 1000 padding_size = (window_size - 1) // 2 self.preemp = None if preemp: self.preemp = nn.Conv1d(1, 1, 2, 1, padding=1, groups=1, bias=False) self.complex_conv = nn.Conv1d(1, 2 * nfilters, window_size, 1, padding=padding_size, groups=1, bias=False) self.modulus = nn.LPPool1d(2, 2, stride=2) self.lowpass = nn.Conv1d(nfilters, nfilters, window_size, window_stride, padding=0, groups=nfilters, bias=False) if mode == 'Fixed': for param in self.parameters(): param.requires_grad = False elif mode == 'learnfbanks': if preemp: self.preemp.weight.requires_grad = False self.lowpass.weight.requires_grad = False if mvn: self.instancenorm = nn.InstanceNorm1d(nfilters, momentum=1) self.nfilters = nfilters self.fs = samplerate self.wlen = wlen self.wstride = wstride self.compression = compression self.mvn = mvn
def __init__(self, args): super().__init__() self.args = args self.original_len = args.window_size self.latent_len = int(self.original_len / 2) self.dropout_rate = args.drop_out self.hidden = 256 self.heads = 2 self.n_layers = 2 self.output_size = args.output_size self.conv = nn.Conv1d(in_channels=1, out_channels=self.hidden, kernel_size=5, stride=1, padding=2, padding_mode='replicate') self.pool = nn.LPPool1d(norm_type=2, kernel_size=2, stride=2) self.position = PositionalEmbedding(max_len=self.latent_len, d_model=self.hidden) self.layer_norm = LayerNorm(self.hidden) self.dropout = nn.Dropout(p=self.dropout_rate) self.transformer_blocks = nn.ModuleList([ TransformerBlock(self.hidden, self.heads, self.hidden * 4, self.dropout_rate) for _ in range(self.n_layers) ]) self.deconv = nn.ConvTranspose1d(in_channels=self.hidden, out_channels=self.hidden, kernel_size=4, stride=2, padding=1) self.linear1 = nn.Linear(self.hidden, 128) self.linear2 = nn.Linear(128, self.output_size) self.truncated_normal_init()
#%% unpool(out, indices) ##恢复为原先的形状,除最大元素外,其他元素为 0 #%% #>3. 平均池化 ##>3.1 二维平均池化 m = nn.AvgPool2d((3, 2), stride=(2, 1)) input = torch.randn(20, 16, 50, 32) out = m(input) out.shape #%% #>4. LP范数池化 input = torch.arange(1, 5, dtype=torch.float).reshape(1, 1, 4) pool = nn.LPPool1d(2, 2) ##取2-范数,核为2,其他参数默认 out = pool(input) out #%% #>5. 自适应最大池化 #%% [markdown] # - 自适应最大池化只需要指定输出的目标尺寸即可,其他参数自动计算 #%% ##>5.1 一维自适应最大池化 input = torch.randn(1, 64, 8) m = nn.AdaptiveMaxPool1d(5) #目标尺寸为5 out = m(input) out.shape
def __append_layer(self, net_style, args_dict): args_values_list = list(args_dict.values()) if net_style == "Conv2d": self.layers.append( nn.Conv2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "MaxPool2d": self.layers.append( nn.MaxPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "Linear": self.layers.append( nn.Linear(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "reshape": # 如果是特殊情况 reshape,就直接将目标向量尺寸传入 # print(type(args_values_list[0])) self.layers.append(args_values_list[0]) elif net_style == "Conv1d": self.layers.append( nn.Conv1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "Conv3d": self.layers.append( nn.Conv3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7])) elif net_style == "ConvTranspose1d": self.layers.append( nn.ConvTranspose1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "ConvTranspose2d": self.layers.append( nn.ConvTranspose2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "ConvTranspose3d": self.layers.append( nn.ConvTranspose3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5], args_values_list[6], args_values_list[7], args_values_list[8])) elif net_style == "Unfold": self.layers.append( nn.Unfold(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "Fold": self.layers.append( nn.Unfold(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "MaxPool1d": self.layers.append( nn.MaxPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "MaxPool3d": self.layers.append( nn.MaxPool3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4], args_values_list[5])) elif net_style == "MaxUnpool1d": self.layers.append( nn.MaxUnpool1d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "MaxUnpool2d": self.layers.append( nn.MaxUnpool2d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "MaxUnpool3d": self.layers.append( nn.MaxUnpool3d(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "AvgPool1d": self.layers.append( nn.AvgPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "AvgPool2d": self.layers.append( nn.AvgPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "AvgPool3d": self.layers.append( nn.AvgPool3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "FractionalMaxPool2d": self.layers.append( nn.FractionalMaxPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LPPool1d": self.layers.append( nn.LPPool1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "LPPool2d": self.layers.append( nn.LPPool2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "AdaptiveMaxPool1d": self.layers.append( nn.AdaptiveMaxPool1d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveMaxPool2d": self.layers.append( nn.AdaptiveMaxPool2d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveMaxPool3d": self.layers.append( nn.AdaptiveMaxPool3d(args_values_list[0], args_values_list[1])) elif net_style == "AdaptiveAvgPool1d": self.layers.append(nn.AdaptiveAvgPool1d(args_values_list[0])) elif net_style == "AdaptiveAvgPool2d": self.layers.append(nn.AdaptiveAvgPool2d(args_values_list[0])) elif net_style == "AdaptiveAvgPool3d": self.layers.append(nn.AdaptiveAvgPool3d(args_values_list[0])) elif net_style == "ReflectionPad1d": self.layers.append(nn.ReflectionPad1d(args_values_list[0])) elif net_style == "ReflectionPad2d": self.layers.append(nn.ReflectionPad2d(args_values_list[0])) elif net_style == "ReplicationPad1d": self.layers.append(nn.ReplicationPad1d(args_values_list[0])) elif net_style == "ReplicationPad2d": self.layers.append(nn.ReplicationPad2d(args_values_list[0])) elif net_style == "ReplicationPad3d": self.layers.append(nn.ReplicationPad3d(args_values_list[0])) elif net_style == "ZeroPad2d": self.layers.append(nn.ZeroPad2d(args_values_list[0])) elif net_style == "ConstantPad1d": self.layers.append( nn.ConstantPad1d(args_values_list[0], args_values_list[1])) elif net_style == "ConstantPad2d": self.layers.append( nn.ConstantPad2d(args_values_list[0], args_values_list[1])) elif net_style == "ConstantPad3d": self.layers.append( nn.ConstantPad3d(args_values_list[0], args_values_list[1])) elif net_style == "ELU": self.layers.append(nn.ELU(args_values_list[0], args_values_list[1])) elif net_style == "Hardshrink": self.layers.append(nn.Hardshrink(args_values_list[0])) elif net_style == "Hardtanh": self.layers.append( nn.Hardtanh(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LeakyReLU": self.layers.append( nn.LeakyReLU(args_values_list[0], args_values_list[1])) elif net_style == "LogSigmoid": self.layers.append(nn.LogSigmoid()) elif net_style == "PReLU": self.layers.append( nn.PReLU(args_values_list[0], args_values_list[1])) elif net_style == "ReLU": self.layers.append(nn.ReLU(args_values_list[0])) elif net_style == "ReLU6": self.layers.append(nn.ReLU6(args_values_list[0])) elif net_style == "RReLU": self.layers.append( nn.RReLU(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "SELU": self.layers.append(nn.SELU(args_values_list[0])) elif net_style == "CELU": self.layers.append( nn.CELU(args_values_list[0], args_values_list[1])) elif net_style == "Sigmoid": self.layers.append(nn.Sigmoid()) elif net_style == "Softplus": self.layers.append( nn.Softplus(args_values_list[0], args_values_list[1])) elif net_style == "Softshrink": self.layers.append(nn.Softshrink(args_values_list[0])) elif net_style == "Softsign": self.layers.append(nn.Softsign()) elif net_style == "Tanh": self.layers.append(nn.Tanh()) elif net_style == "Tanhshrink": self.layers.append(nn.Tanhshrink()) elif net_style == "Threshold": self.layers.append( nn.Threshold(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "Softmin": self.layers.append(nn.Softmin(args_values_list[0])) elif net_style == "Softmax": self.layers.append(nn.Softmax(args_values_list[0])) elif net_style == "Softmax2d": self.layers.append(nn.Softmax2d()) elif net_style == "LogSoftmax": self.layers.append(nn.LogSoftmax(args_values_list[0])) elif net_style == "AdaptiveLogSoftmaxWithLoss": self.layers.append( nn.AdaptiveLogSoftmaxWithLoss(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm1d": self.layers.append( nn.BatchNorm1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm2d": self.layers.append( nn.BatchNorm2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "BatchNorm3d": self.layers.append( nn.BatchNorm3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "GroupNorm": self.layers.append( nn.GroupNorm(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "InstanceNorm1d": self.layers.append( nn.InstanceNorm1d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "InstanceNorm2d": self.layers.append( nn.InstanceNorm2d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "InstanceNorm3d": self.layers.append( nn.InstanceNorm3d(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3], args_values_list[4])) elif net_style == "LayerNorm": self.layers.append( nn.LayerNorm(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "LocalResponseNorm": self.layers.append( nn.LocalResponseNorm(args_values_list[0], args_values_list[1], args_values_list[2], args_values_list[3])) elif net_style == "Linear": self.layers.append( nn.Linear(args_values_list[0], args_values_list[1], args_values_list[2])) elif net_style == "Dropout": self.layers.append( nn.Dropout(args_values_list[0], args_values_list[1])) elif net_style == "Dropout2d": self.layers.append( nn.Dropout2d(args_values_list[0], args_values_list[1])) elif net_style == "Dropout3d": self.layers.append( nn.Dropout3d(args_values_list[0], args_values_list[1])) elif net_style == "AlphaDropout": self.layers.append( nn.AlphaDropout(args_values_list[0], args_values_list[1]))