def __init__( self, in_dim, hidden_dim, out_dim, num_layers=4, stream_sizes=None, ar_orders=None, init_type="none", **kwargs, ): super().__init__( in_dim=in_dim, hidden_dim=hidden_dim, out_dim=out_dim, num_layers=num_layers ) if "dropout" in kwargs: warn( "dropout argument in Conv1dResnetSAR is deprecated" " and will be removed in future versions" ) if stream_sizes is None: stream_sizes = [180, 3, 1, 15] if ar_orders is None: ar_orders = [20, 200, 20, 20] self.stream_sizes = stream_sizes init_weights(self, init_type) self.analysis_filts = nn.ModuleList() for s, K in zip(stream_sizes, ar_orders): self.analysis_filts += [TrTimeInvFIRFilter(s, K + 1)]
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=2, dropout=0.0, init_type="normal", cin_dim=-1, last_sigmoid=False, ): super(FFN, self).__init__() self.first_linear = nn.Linear(in_dim, hidden_dim) self.hidden_layers = nn.ModuleList( [nn.Linear(hidden_dim, hidden_dim) for _ in range(num_layers)]) self.last_linear = nn.Linear(hidden_dim, out_dim) self.relu = nn.LeakyReLU() self.dropout = nn.Dropout(dropout) self.last_sigmoid = last_sigmoid if cin_dim > 0: self.cond = nn.Linear(cin_dim, hidden_dim) else: self.cond = None init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=1, num_gaussians=8, dim_wise=False, init_type="none", **kwargs, ): super(MDN, self).__init__() if "dropout" in kwargs: warn( "dropout argument in MDN is deprecated" " and will be removed in future versions" ) model = [nn.Linear(in_dim, hidden_dim), nn.ReLU()] if num_layers > 1: for _ in range(num_layers - 1): model += [nn.Linear(hidden_dim, hidden_dim), nn.ReLU()] model += [ MDNLayer( in_dim=hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ) ] self.model = nn.Sequential(*model) init_weights(self, init_type)
def __init__( self, in_dim=None, hidden_dim=64, padding=None, init_type="normal", last_sigmoid=False, ): super().__init__() C = hidden_dim self.conv_in = Conv2dGLU(1, C, (3, 3), stride=(1, 1), padding=padding, norm_layer=None) self.downsample = nn.ModuleList([ Conv2dGLU(C, 2 * C, (3, 3), stride=(2, 2), padding=padding), Conv2dGLU(2 * C, 4 * C, (3, 3), stride=(2, 2), padding=padding), Conv2dGLU(4 * C, 8 * C, (3, 3), stride=(2, 2), padding=padding), Conv2dGLU(8 * C, 8 * C, (1, 5), stride=(1, 1), padding=padding), ]) # NOTE: 8x smaller time lengths for the output # depends on the stride self.downsample_scale = 8 if padding is None: padding_ = (1, 1, 0, 0) self.conv_out = nn.Sequential( nn.ReflectionPad1d(padding_), nn.Conv2d(8 * C, 1, (1, 3), padding=0), ) else: self.conv_out = nn.Conv2d(8 * C, 1, (1, 3), padding=padding) self.last_sigmoid = last_sigmoid init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=1, dropout=0.5, num_gaussians=8, dim_wise=False, init_type="none", ): super(MDNv2, self).__init__() model = [nn.Linear(in_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout)] if num_layers > 1: for _ in range(num_layers - 1): model += [ nn.Linear(hidden_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), ] model += [ MDNLayer( in_dim=hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ) ] self.model = nn.Sequential(*model) init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=2, dropout=0.0, init_type="normal", cin_dim=-1, last_sigmoid=False, ): super().__init__() model = [ nn.ReflectionPad1d(3), WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0), ] for n in range(num_layers): model.append(ResnetBlock(hidden_dim, dilation=2**n)) model += [ nn.LeakyReLU(0.2), ] self.model = nn.ModuleList(model) self.last_conv = WNConv1d(hidden_dim, out_dim, kernel_size=1, padding=0) self.dropout = nn.Dropout(dropout) self.last_sigmoid = last_sigmoid if cin_dim > 0: self.cond = WNConv1d(cin_dim, hidden_dim, kernel_size=1, padding=0) else: self.cond = None init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=1, bidirectional=True, dropout=0.0, num_gaussians=8, dim_wise=False, init_type="none", ): super(RMDN, self).__init__() self.linear = nn.Linear(in_dim, hidden_dim) self.relu = nn.ReLU() self.num_direction = 2 if bidirectional else 1 self.lstm = nn.LSTM( hidden_dim, hidden_dim, num_layers, bidirectional=bidirectional, batch_first=True, dropout=dropout, ) self.mdn = MDNLayer( in_dim=self.num_direction * hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ) init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=4, # NOTE: you must carefully set the following parameters in_lf0_idx=300, in_lf0_min=5.3936276, in_lf0_max=6.491111, out_lf0_idx=180, out_lf0_mean=5.953093881972361, out_lf0_scale=0.23435173188961034, init_type="none", use_mdn=False, num_gaussians=8, dim_wise=False, ): super().__init__() self.in_lf0_idx = in_lf0_idx self.in_lf0_min = in_lf0_min self.in_lf0_max = in_lf0_max self.out_lf0_idx = out_lf0_idx self.out_lf0_mean = out_lf0_mean self.out_lf0_scale = out_lf0_scale self.use_mdn = use_mdn model = [ nn.ReflectionPad1d(3), WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0), ] for n in range(num_layers): model.append(ResnetBlock(hidden_dim, dilation=2 ** n)) last_conv_out_dim = hidden_dim if use_mdn else out_dim model += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), WNConv1d(hidden_dim, last_conv_out_dim, kernel_size=7, padding=0), ] self.model = nn.Sequential(*model) if self.use_mdn: self.mdn_layer = MDNLayer( in_dim=hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ) else: self.mdn_layer = None init_weights(self, init_type)
def __init__( self, in_dim, ff_hidden_dim=2048, conv_hidden_dim=1024, lstm_hidden_dim=256, out_dim=199, dropout=0.0, num_lstm_layers=2, bidirectional=True, init_type="none", ): super().__init__() self.ff = nn.Sequential( nn.Linear(in_dim, ff_hidden_dim), nn.ReLU(), nn.Linear(ff_hidden_dim, ff_hidden_dim), nn.ReLU(), nn.Linear(ff_hidden_dim, ff_hidden_dim), nn.ReLU(), ) self.conv = nn.Sequential( nn.ReflectionPad1d(3), nn.Conv1d(ff_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), nn.ReflectionPad1d(3), nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), nn.ReflectionPad1d(3), nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), ) num_direction = 2 if bidirectional else 1 self.lstm = nn.LSTM( conv_hidden_dim, lstm_hidden_dim, num_lstm_layers, bidirectional=True, batch_first=True, dropout=dropout, ) last_in_dim = num_direction * lstm_hidden_dim self.fc = nn.Linear(last_in_dim, out_dim) init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=4, init_type="none", use_mdn=False, num_gaussians=8, dim_wise=False, **kwargs, ): super().__init__() self.use_mdn = use_mdn if "dropout" in kwargs: warn( "dropout argument in Conv1dResnet is deprecated" " and will be removed in future versions" ) model = [ nn.ReflectionPad1d(3), WNConv1d(in_dim, hidden_dim, kernel_size=7, padding=0), ] for n in range(num_layers): model.append(ResnetBlock(hidden_dim, dilation=2 ** n)) last_conv_out_dim = hidden_dim if use_mdn else out_dim model += [ nn.LeakyReLU(0.2), nn.ReflectionPad1d(3), WNConv1d(hidden_dim, last_conv_out_dim, kernel_size=7, padding=0), ] self.model = nn.Sequential(*model) if self.use_mdn: self.mdn_layer = MDNLayer( in_dim=hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ) else: self.mdn_layer = None init_weights(self, init_type)
def __init__( self, in_dim, out_dim, num_layers=5, hidden_dim=256, kernel_size=5, dropout=0.5, init_type="none", use_mdn=False, num_gaussians=1, dim_wise=False, ): super().__init__() self.use_mdn = use_mdn conv = nn.ModuleList() for idx in range(num_layers): in_channels = in_dim if idx == 0 else hidden_dim conv += [ nn.Sequential( nn.Conv1d( in_channels, hidden_dim, kernel_size, stride=1, padding=(kernel_size - 1) // 2, ), nn.ReLU(), LayerNorm(hidden_dim, dim=1), nn.Dropout(dropout), ) ] self.conv = nn.Sequential(*conv) if self.use_mdn: self.mdn_layer = MDNLayer( hidden_dim, out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise ) else: self.fc = nn.Linear(hidden_dim, out_dim) init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=2, dropout=0.0, init_type="none", last_sigmoid=False, ): super(FFN, self).__init__() self.first_linear = nn.Linear(in_dim, hidden_dim) self.hidden_layers = nn.ModuleList( [nn.Linear(hidden_dim, hidden_dim) for _ in range(num_layers)] ) self.last_linear = nn.Linear(hidden_dim, out_dim) self.relu = nn.ReLU() self.dropout = nn.Dropout(dropout) self.last_sigmoid = last_sigmoid init_weights(self, init_type)
def __init__( self, in_dim, groups, n_layers=3, kernel_size=3, stride=2, init_type="normal", last_sigmoid=False, ): super().__init__() model = nn.ModuleDict() for n in range(0, n_layers): model["layer_%d" % n] = nn.Sequential( WNConv1d( in_dim, in_dim, kernel_size=kernel_size, stride=stride, groups=groups, ), nn.LeakyReLU(0.2), ) model["layer_%d" % (n_layers)] = nn.Sequential( WNConv1d(in_dim, groups, kernel_size=kernel_size, stride=1), nn.LeakyReLU(0.2), ) model["layer_%d" % (n_layers + 2)] = WNConv1d(groups, 1, kernel_size=kernel_size, stride=1) self.last_sigmoid = last_sigmoid self.model = model init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=4, num_gaussians=8, dim_wise=False, init_type="none", **kwargs, ): super().__init__() if "dropout" in kwargs: warn( "dropout argument in Conv1dResnet is deprecated" " and will be removed in future versions" ) model = [ Conv1dResnet( in_dim=in_dim, hidden_dim=hidden_dim, out_dim=hidden_dim, num_layers=num_layers, ), nn.ReLU(), MDNLayer( in_dim=hidden_dim, out_dim=out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise, ), ] self.model = nn.Sequential(*model) init_weights(self, init_type)
def __init__( self, in_dim, hidden_dim, out_dim, num_layers=1, bidirectional=True, dropout=0.0, init_type="none", ): super(LSTMRNN, self).__init__() self.hidden_dim = hidden_dim self.num_layers = num_layers self.num_direction = 2 if bidirectional else 1 self.lstm = nn.LSTM( in_dim, hidden_dim, num_layers, bidirectional=bidirectional, batch_first=True, dropout=dropout, ) self.hidden2out = nn.Linear(self.num_direction * self.hidden_dim, out_dim) init_weights(self, init_type)
def __init__( self, in_dim=None, channels=128, kernel_size=5, init_type="kaiming_normal", padding_side="left", ): super().__init__() assert not isinstance(kernel_size, list) C = channels ks = kernel_size padding = (ks - 1) // 2 self.padding = padding # Treat padding for the feature-axis carefully # use normal padding for the time-axis (i.e., (padding, padding)) self.padding_side = padding_side if padding_side == "left": self.pad = nn.ReflectionPad2d((padding, 0, padding, padding)) elif padding_side == "none": self.pad = nn.ReflectionPad2d((0, 0, padding, padding)) elif padding_side == "right": self.pad = nn.ReflectionPad2d((0, padding, padding, padding)) else: raise ValueError("Invalid padding side") self.conv1 = nn.Sequential( nn.Conv2d(2, C, kernel_size=(ks, ks)), nn.ReLU(), ) # NOTE: for the subsequent layers, use fixed kernel_size 3 for feature-axis self.conv2 = nn.Sequential( nn.Conv2d( C + 1, C * 2, kernel_size=(ks, 3), padding=(padding, 1), padding_mode="reflect", ), nn.ReLU(), ) self.conv3 = nn.Sequential( nn.Conv2d( C * 2 + 1, C, kernel_size=(ks, 3), padding=(padding, 1), padding_mode="reflect", ), nn.ReLU(), ) self.conv4 = nn.Conv2d(C + 1, 1, kernel_size=(ks, 1), padding=(padding, 0), padding_mode="reflect") self.fc = nn.Linear(1, in_dim) init_weights(self, init_type)
def __init__( self, in_dim=None, channels=128, kernel_size=(5, 5), init_type="kaiming_normal", noise_scale=1.0, noise_type="bin_wise", padding_mode="zeros", smoothing_width=-1, ): super().__init__() self.in_dim = in_dim self.noise_type = noise_type self.noise_scale = noise_scale C = channels self.smoothing_width = smoothing_width assert len(kernel_size) == 2 ks = np.asarray(list(kernel_size)) padding = (ks - 1) // 2 self.conv1 = nn.Sequential( nn.Conv2d( 2, C, kernel_size=ks, padding=padding, padding_mode=padding_mode, ), nn.ReLU(), ) self.conv2 = nn.Sequential( nn.Conv2d(C + 1, C * 2, kernel_size=ks, padding=padding, padding_mode=padding_mode), nn.ReLU(), ) self.conv3 = nn.Sequential( nn.Conv2d(C * 2 + 1, C, kernel_size=ks, padding=padding, padding_mode=padding_mode), nn.ReLU(), ) self.conv4 = nn.Conv2d(C + 1, 1, kernel_size=ks, padding=padding, padding_mode=padding_mode) if self.noise_type == "frame_wise": # noise: (B, T, 1) self.fc = nn.Linear(1, in_dim) elif self.noise_type == "bin_wise": # noise: (B, T, C) self.fc = None else: raise ValueError("Unknown noise type: {}".format(self.noise_type)) init_weights(self, init_type)
def __init__( self, in_dim, ff_hidden_dim=2048, conv_hidden_dim=1024, lstm_hidden_dim=256, out_dim=199, dropout=0.0, num_lstm_layers=2, bidirectional=True, # NOTE: you must carefully set the following parameters in_lf0_idx=300, in_lf0_min=5.3936276, in_lf0_max=6.491111, out_lf0_idx=180, out_lf0_mean=5.953093881972361, out_lf0_scale=0.23435173188961034, skip_inputs=False, init_type="none", use_mdn=False, num_gaussians=8, dim_wise=False, ): super().__init__() self.in_lf0_idx = in_lf0_idx self.in_lf0_min = in_lf0_min self.in_lf0_max = in_lf0_max self.out_lf0_idx = out_lf0_idx self.out_lf0_mean = out_lf0_mean self.out_lf0_scale = out_lf0_scale self.skip_inputs = skip_inputs self.use_mdn = use_mdn self.ff = nn.Sequential( nn.Linear(in_dim, ff_hidden_dim), nn.ReLU(), nn.Linear(ff_hidden_dim, ff_hidden_dim), nn.ReLU(), nn.Linear(ff_hidden_dim, ff_hidden_dim), nn.ReLU(), ) self.conv = nn.Sequential( nn.ReflectionPad1d(3), nn.Conv1d(ff_hidden_dim + 1, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), nn.ReflectionPad1d(3), nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), nn.ReflectionPad1d(3), nn.Conv1d(conv_hidden_dim, conv_hidden_dim, kernel_size=7, padding=0), nn.BatchNorm1d(conv_hidden_dim), nn.ReLU(), ) num_direction = 2 if bidirectional else 1 self.lstm = nn.LSTM( conv_hidden_dim, lstm_hidden_dim, num_lstm_layers, bidirectional=True, batch_first=True, dropout=dropout, ) if self.skip_inputs: last_in_dim = num_direction * lstm_hidden_dim + in_dim else: last_in_dim = num_direction * lstm_hidden_dim if self.use_mdn: self.mdn_layer = MDNLayer( last_in_dim, out_dim, num_gaussians=num_gaussians, dim_wise=dim_wise ) else: self.fc = nn.Linear(last_in_dim, out_dim) init_weights(self, init_type)
def __init__( self, in_dim=512, out_dim=80, encoder_lstm_hidden_dim=256, encoder_num_lstm_layers=3, encoder_dropout=0.0, decoder_layers=2, decoder_hidden_dim=1024, decoder_prenet_layers=2, decoder_prenet_hidden_dim=1024, decoder_prenet_dropout=0.5, decoder_zoneout=0.1, postnet_layers=5, postnet_channels=512, postnet_kernel_size=5, postnet_dropout=0.5, reduction_factor=1, init_type="none", # NOTE: you must carefully set the following parameters in_lf0_idx=300, in_lf0_min=5.3936276, in_lf0_max=6.491111, out_lf0_idx=180, out_lf0_mean=5.953093881972361, out_lf0_scale=0.23435173188961034, ): super().__init__() self.in_lf0_idx = in_lf0_idx self.in_lf0_min = in_lf0_min self.in_lf0_max = in_lf0_max self.out_lf0_idx = out_lf0_idx self.out_lf0_mean = out_lf0_mean self.out_lf0_scale = out_lf0_scale self.reduction_factor = reduction_factor # Encoder self.lstm = nn.LSTM( in_dim, encoder_lstm_hidden_dim, encoder_num_lstm_layers, bidirectional=True, batch_first=True, dropout=encoder_dropout, ) # Decoder decoder_hidden_dim = 2 * encoder_lstm_hidden_dim + 1 self.decoder = NonAttentiveTacotronDecoder( decoder_hidden_dim, out_dim, decoder_layers, decoder_hidden_dim, decoder_prenet_layers, decoder_prenet_hidden_dim, decoder_prenet_dropout, decoder_zoneout, reduction_factor, ) # Post-Net self.postnet = TacotronPostnet( out_dim, postnet_layers, postnet_channels, postnet_kernel_size, postnet_dropout, ) init_weights(self, init_type)
def __init__( self, in_dim=None, channels=64, kernel_size=(5, 3), padding=(0, 0), last_sigmoid=False, init_type="kaiming_normal", padding_mode="zeros", ): super().__init__() self.last_sigmoid = last_sigmoid C = channels ks = np.asarray(list(kernel_size)) if padding is None: padding = (ks - 1) // 2 self.convs = nn.ModuleList() self.convs.append( nn.Sequential( nn.Conv2d( 1, C, kernel_size=ks, padding=padding, stride=(1, 1), padding_mode=padding_mode, ), nn.LeakyReLU(0.2), )) self.convs.append( nn.Sequential( nn.Conv2d( C, 2 * C, kernel_size=ks, padding=padding, stride=(2, 1), padding_mode=padding_mode, ), nn.LeakyReLU(0.2), )) self.convs.append( nn.Sequential( nn.Conv2d( 2 * C, 4 * C, kernel_size=ks, padding=padding, stride=(2, 1), padding_mode=padding_mode, ), nn.LeakyReLU(0.2), )) self.convs.append( nn.Sequential( nn.Conv2d( 4 * C, 2 * C, kernel_size=ks, padding=padding, stride=(2, 1), padding_mode=padding_mode, ), nn.LeakyReLU(0.2), )) self.last_conv = nn.Conv2d( 2 * C, 1, kernel_size=ks, padding=padding, stride=(1, 1), padding_mode=padding_mode, ) init_weights(self, init_type)