def __init__(self, n_chan, filter_sz, stride, parent_vc, bias=True, name=None): super(Upsampling, self).__init__() # See upsampling_notes.txt: padding = filter_sz - stride # and: left_offset = left_wing_sz - end_padding end_padding = stride - 1 self.vc = vconv.VirtualConv(filter_info=filter_sz, stride=stride, padding=(end_padding, end_padding), is_downsample=False, parent=parent_vc, name=name) self.tconv = nn.ConvTranspose1d(n_chan, n_chan, filter_sz, stride, padding=filter_sz - stride, bias=bias) self.apply(netmisc.xavier_init)
def __init__(self, name, parent_vc, **kwargs): super(Conv1dWrap, self).__init__(**kwargs) self.apply(netmisc.xavier_init) self.vc = vconv.VirtualConv(filter_info=kwargs['kernel_size'], stride=kwargs['stride'], name=name, parent=parent_vc)
def __init__(self, wavenet_vc, n_cond, n_res, n_dil, n_skp, stride, dil, filter_sz=2, bias=True, parent_vc=None, name=None): """ filter_sz: # elements in the dilated kernels n_cond: # channels of local condition vectors n_res : # residual channels n_dil : # output channels for dilated kernel n_skp : # channels output to skip connections """ super(GatedResidualCondConv, self).__init__() self.wavenet_vc = wavenet_vc self.conv_signal = nn.Conv1d(n_res, n_dil, filter_sz, dilation=dil, bias=bias) self.conv_gate = nn.Conv1d(n_res, n_dil, filter_sz, dilation=dil, bias=bias) self.proj_signal = nn.Conv1d(n_cond, n_dil, kernel_size=1, bias=False) self.proj_gate = nn.Conv1d(n_cond, n_dil, kernel_size=1, bias=False) self.dil_res = nn.Conv1d(n_dil, n_res, kernel_size=1, bias=False) self.dil_skp = nn.Conv1d(n_dil, n_skp, kernel_size=1, bias=False) # The dilated autoregressive convolution produces an output at the # right-most position of the receptive field. (At the very end of a # stack of these, the output corresponds to the position just after # this, but within the stack of convolutions, outputs right-aligned. dil_filter_sz = (filter_sz - 1) * dil + 1 self.vc = vconv.VirtualConv(filter_info=(dil_filter_sz - 1, 0), parent=parent_vc, name=name) self.apply(netmisc.xavier_init)
def make_vcs(): vc = None vcs = {} for m in model: vc = vconv.VirtualConv(*m, parent=vc) vcs[vc.name] = vc return vcs
def __init__(self, wavenet_vc, hps, n_cond, stride, dil, final_layer=False, parent_vc=None, name=None): """ filter_sz: # elements in the dilated kernels """ super(GatedResidualCondConv, self).__init__() self.wavenet_vc = wavenet_vc self.final_layer = final_layer self.conv_signal = nn.Conv1d(hps.n_res, hps.n_dil, hps.filter_sz, dilation=dil, bias=hps.bias) self.conv_gate = nn.Conv1d(hps.n_res, hps.n_dil, hps.filter_sz, dilation=dil, bias=hps.bias) self.proj_signal = nn.Conv1d(n_cond, hps.n_dil, kernel_size=1, bias=False) self.proj_gate = nn.Conv1d(n_cond, hps.n_dil, kernel_size=1, bias=False) self.dil_skp = nn.Conv1d(hps.n_dil, hps.n_skp, kernel_size=1, bias=False) if not final_layer: self.dil_res = nn.Conv1d(hps.n_dil, hps.n_res, kernel_size=1, bias=False) # The dilated autoregressive convolution produces an output at the # right-most position of the receptive field. (At the very end of a # stack of these, the output corresponds to the position just after # this, but within the stack of convolutions, outputs right-aligned. dil_filter_sz = (hps.filter_sz - 1) * dil + 1 self.vc = vconv.VirtualConv(filter_info=(dil_filter_sz - 1, 0), parent=parent_vc, name=name) self.apply(netmisc.xavier_init)
def input_gen(t): for lw, rw, lp, rp in itertools.product(t.lw, t.rw, t.lp, t.rp): for st in t.strides: try: vc = vconv.VirtualConv((lw, rw), (lp, rp), st, True, 'Conv', None) except RuntimeError: continue print('lw: {}, rw: {}, lp: {}, rp: {}, st: {}'.format(lw, rw, lp, rp, st)) for spec in itertools.product(t.start, t.l1, t.l2, t.l3, t.gs): yield vc, grid_range(*spec, 1) for ist in t.inv_strides: try: vc = vconv.VirtualConv((lw, rw), (lp, rp), ist, False, 'Conv', None) except RuntimeError: continue print('lw: {}, rw: {}, lp: {}, rp: {}, ist: {}'.format(lw, rw, lp, rp, ist)) for spec in itertools.product(t.start, t.l1, t.l2, t.l3, t.gs): yield vc, grid_range(*spec, vc.stride_ratio.denominator)
def __init__(self, hps): super(MfccInverter, self).__init__() self.bn_type = 'none' self.mfcc = mfcc.ProcessWav( sample_rate=hps.sample_rate, win_sz=hps.mfcc_win_sz, hop_sz=hps.mfcc_hop_sz, n_mels=hps.n_mels, n_mfcc=hps.n_mfcc) mfcc_vc = vconv.VirtualConv(filter_info=hps.mfcc_win_sz, stride=hps.mfcc_hop_sz, parent=None, name='MFCC') self.wavenet = wn.WaveNet(hps, parent_vc=mfcc_vc) self.objective = wn.RecLoss() self._init_geometry(hps.n_win_batch)
def _initialize(self): super(Slice, self).__init__() self.target_device = None self.__dict__.update(self.init_args) self.jitter = jitter.Jitter(self.jitter_prob) self.mfcc_proc = mfcc.ProcessWav( sample_rate=self.sample_rate, win_sz=self.mfcc_win_sz, hop_sz=self.mfcc_hop_sz, n_mels=self.n_mels, n_mfcc=self.n_mfcc) self.mfcc_vc = vconv.VirtualConv(filter_info=self.mfcc_win_sz, stride=self.mfcc_hop_sz, parent=None, name='MFCC')
def __init__(self, sample_rate=16000, win_sz=400, hop_sz=160, n_mels=80, n_mfcc=13, name=None): self.sample_rate = sample_rate self.window_sz = win_sz self.hop_sz = hop_sz self.n_mels = n_mels self.n_mfcc = n_mfcc self.n_out = n_mfcc * 3 self.vc = vconv.VirtualConv(filter_info=self.window_sz, stride=self.hop_sz, parent=None, name=name)
def __init__(self, n_in_chan, n_out_chan, filter_sz, stride=1, do_res=True, parent_vc=None, name=None): super(ConvReLURes, self).__init__() self.n_in = n_in_chan self.n_out = n_out_chan self.conv = nn.Conv1d(n_in_chan, n_out_chan, filter_sz, stride, padding=0, bias=True) self.relu = nn.ReLU() self.name = name # self.bn = nn.BatchNorm1d(n_out_chan) self.vc = vconv.VirtualConv(filter_info=filter_sz, stride=stride, parent=parent_vc, name=name) self.do_res = do_res if self.do_res: if stride != 1: print('Stride must be 1 for residually connected convolution', file=stderr) raise ValueError l_off, r_off = vconv.output_offsets(self.vc, self.vc) self.register_buffer('residual_offsets', torch.tensor([l_off, r_off])) netmisc.xavier_init(self.conv)
def __init__(self, filter_sz, n_lc_in, n_lc_out, lc_upsample_filt_sizes, lc_upsample_strides, n_res, n_dil, n_skp, n_post, n_quant, n_blocks, n_block_layers, n_speakers, n_global_embed, bias=True, parent_vc=None): super(WaveNet, self).__init__() self.n_blocks = n_blocks self.n_block_layers = n_block_layers self.n_quant = n_quant self.quant_onehot = None self.bias = bias post_jitter_filt_sz = 3 lc_input_stepsize = np_prod(lc_upsample_strides) lc_conv_name = 'LC_Conv(filter_size={})'.format(post_jitter_filt_sz) self.lc_conv = Conv1dWrap(n_lc_in, n_lc_out, kernel_size=post_jitter_filt_sz, stride=1, bias=self.bias) cur_vc = vconv.VirtualConv(filter_info=post_jitter_filt_sz, stride=1, parent=parent_vc, name=lc_conv_name) self.vc = dict() self.vc['beg'] = cur_vc # This VC is the first processing of the local conditioning after the # Jitter. It is the starting point for the commitment loss aggregation self.vc['pre_upsample'] = cur_vc self.lc_upsample = nn.Sequential() # WaveNet is a stand-alone model, so parent_vc is None # The Autoencoder model in model.py will link parent_vcs together. for i, (filt_sz, stride) in enumerate(zip(lc_upsample_filt_sizes, lc_upsample_strides)): name = 'Upsampling_{}(filter_sz={}, stride={})'.format(i, filt_sz, stride) mod = Upsampling(n_lc_out, filt_sz, stride, cur_vc, name=name) self.lc_upsample.add_module(str(i), mod) cur_vc = mod.vc # This vc describes the bounds of the input wav corresponding to the # local conditioning vectors self.vc['last_upsample'] = cur_vc self.cond = Conditioning(n_speakers, n_global_embed) self.base_layer = Conv1dWrap(n_quant, n_res, kernel_size=1, stride=1, dilation=1, bias=self.bias) self.conv_layers = nn.ModuleList() n_cond = n_lc_out + n_global_embed for b in range(self.n_blocks): for bl in range(self.n_block_layers): dil = 2**bl name = 'GRCC_{},{}(dil={})'.format(b, bl, dil) grc = GatedResidualCondConv(self.vc, n_cond, n_res, n_dil, n_skp, 1, dil, filter_sz, bias, cur_vc, name) self.conv_layers.append(grc) cur_vc = grc.vc # Each module in the stack needs to know the dimensions of # the input and output of the overall stack, in order to trim # residual connections self.vc['beg_grcc'] = self.conv_layers[0].vc self.vc['end_grcc'] = self.conv_layers[-1].vc self.vc['beg_grcc'].do_trim_input = True self.relu = nn.ReLU() self.post1 = Conv1dWrap(n_skp, n_post, 1, bias=bias) self.post2 = Conv1dWrap(n_post, n_quant, 1, bias=bias) self.logsoftmax = nn.LogSoftmax(1) # (B, Q, N) self.vc['main'] = cur_vc