Esempio n. 1
0
    def __init__(self,
                 n_chan,
                 filter_sz,
                 stride,
                 parent_vc,
                 bias=True,
                 name=None):
        super(Upsampling, self).__init__()
        # See upsampling_notes.txt: padding = filter_sz - stride
        # and: left_offset = left_wing_sz - end_padding
        end_padding = stride - 1
        self.vc = vconv.VirtualConv(filter_info=filter_sz,
                                    stride=stride,
                                    padding=(end_padding, end_padding),
                                    is_downsample=False,
                                    parent=parent_vc,
                                    name=name)

        self.tconv = nn.ConvTranspose1d(n_chan,
                                        n_chan,
                                        filter_sz,
                                        stride,
                                        padding=filter_sz - stride,
                                        bias=bias)
        self.apply(netmisc.xavier_init)
Esempio n. 2
0
 def __init__(self, name, parent_vc, **kwargs):
     super(Conv1dWrap, self).__init__(**kwargs)
     self.apply(netmisc.xavier_init)
     self.vc = vconv.VirtualConv(filter_info=kwargs['kernel_size'],
                                 stride=kwargs['stride'],
                                 name=name,
                                 parent=parent_vc)
Esempio n. 3
0
    def __init__(self, wavenet_vc, n_cond, n_res, n_dil, n_skp, stride, dil,
            filter_sz=2, bias=True, parent_vc=None, name=None):
        """
        filter_sz: # elements in the dilated kernels
        n_cond: # channels of local condition vectors
        n_res : # residual channels
        n_dil : # output channels for dilated kernel
        n_skp : # channels output to skip connections
        """
        super(GatedResidualCondConv, self).__init__()
        self.wavenet_vc = wavenet_vc 
        self.conv_signal = nn.Conv1d(n_res, n_dil, filter_sz, dilation=dil, bias=bias)
        self.conv_gate = nn.Conv1d(n_res, n_dil, filter_sz, dilation=dil, bias=bias)
        self.proj_signal = nn.Conv1d(n_cond, n_dil, kernel_size=1, bias=False)
        self.proj_gate = nn.Conv1d(n_cond, n_dil, kernel_size=1, bias=False)
        self.dil_res = nn.Conv1d(n_dil, n_res, kernel_size=1, bias=False)
        self.dil_skp = nn.Conv1d(n_dil, n_skp, kernel_size=1, bias=False)

        # The dilated autoregressive convolution produces an output at the
        # right-most position of the receptive field.  (At the very end of a
        # stack of these, the output corresponds to the position just after
        # this, but within the stack of convolutions, outputs right-aligned.
        dil_filter_sz = (filter_sz - 1) * dil + 1
        self.vc = vconv.VirtualConv(filter_info=(dil_filter_sz - 1, 0),
                parent=parent_vc, name=name)
        self.apply(netmisc.xavier_init)
Esempio n. 4
0
def make_vcs():
    vc = None
    vcs = {}
    for m in model:
        vc = vconv.VirtualConv(*m, parent=vc)
        vcs[vc.name] = vc
    return vcs
Esempio n. 5
0
    def __init__(self,
                 wavenet_vc,
                 hps,
                 n_cond,
                 stride,
                 dil,
                 final_layer=False,
                 parent_vc=None,
                 name=None):
        """
        filter_sz: # elements in the dilated kernels
        """
        super(GatedResidualCondConv, self).__init__()
        self.wavenet_vc = wavenet_vc
        self.final_layer = final_layer

        self.conv_signal = nn.Conv1d(hps.n_res,
                                     hps.n_dil,
                                     hps.filter_sz,
                                     dilation=dil,
                                     bias=hps.bias)
        self.conv_gate = nn.Conv1d(hps.n_res,
                                   hps.n_dil,
                                   hps.filter_sz,
                                   dilation=dil,
                                   bias=hps.bias)
        self.proj_signal = nn.Conv1d(n_cond,
                                     hps.n_dil,
                                     kernel_size=1,
                                     bias=False)
        self.proj_gate = nn.Conv1d(n_cond,
                                   hps.n_dil,
                                   kernel_size=1,
                                   bias=False)
        self.dil_skp = nn.Conv1d(hps.n_dil,
                                 hps.n_skp,
                                 kernel_size=1,
                                 bias=False)

        if not final_layer:
            self.dil_res = nn.Conv1d(hps.n_dil,
                                     hps.n_res,
                                     kernel_size=1,
                                     bias=False)

        # The dilated autoregressive convolution produces an output at the
        # right-most position of the receptive field.  (At the very end of a
        # stack of these, the output corresponds to the position just after
        # this, but within the stack of convolutions, outputs right-aligned.
        dil_filter_sz = (hps.filter_sz - 1) * dil + 1
        self.vc = vconv.VirtualConv(filter_info=(dil_filter_sz - 1, 0),
                                    parent=parent_vc,
                                    name=name)
        self.apply(netmisc.xavier_init)
Esempio n. 6
0
def input_gen(t):
    for lw, rw, lp, rp in itertools.product(t.lw, t.rw, t.lp, t.rp):
        for st in t.strides:
            try:
                vc = vconv.VirtualConv((lw, rw), (lp, rp), st, True, 'Conv', None)
            except RuntimeError:
                continue
            print('lw: {}, rw: {}, lp: {}, rp: {}, st: {}'.format(lw, rw, lp,
                rp, st))
            for spec in itertools.product(t.start, t.l1, t.l2, t.l3, t.gs): 
                yield vc, grid_range(*spec, 1)
        for ist in t.inv_strides:
            try:
                vc = vconv.VirtualConv((lw, rw), (lp, rp), ist, False, 'Conv', None)
            except RuntimeError:
                continue
            print('lw: {}, rw: {}, lp: {}, rp: {}, ist: {}'.format(lw, rw, lp,
                rp, ist))
            for spec in itertools.product(t.start, t.l1, t.l2, t.l3, t.gs): 
                yield vc, grid_range(*spec, vc.stride_ratio.denominator)
Esempio n. 7
0
    def __init__(self, hps):
        super(MfccInverter, self).__init__()
        self.bn_type = 'none' 
        self.mfcc = mfcc.ProcessWav(
                sample_rate=hps.sample_rate, win_sz=hps.mfcc_win_sz,
                hop_sz=hps.mfcc_hop_sz, n_mels=hps.n_mels, n_mfcc=hps.n_mfcc)

        mfcc_vc = vconv.VirtualConv(filter_info=hps.mfcc_win_sz,
                stride=hps.mfcc_hop_sz, parent=None, name='MFCC')

        self.wavenet = wn.WaveNet(hps, parent_vc=mfcc_vc)
        self.objective = wn.RecLoss()
        self._init_geometry(hps.n_win_batch)
Esempio n. 8
0
 def _initialize(self):
     super(Slice, self).__init__()
     self.target_device = None
     self.__dict__.update(self.init_args)
     self.jitter = jitter.Jitter(self.jitter_prob) 
     self.mfcc_proc = mfcc.ProcessWav(
             sample_rate=self.sample_rate,
             win_sz=self.mfcc_win_sz,
             hop_sz=self.mfcc_hop_sz,
             n_mels=self.n_mels,
             n_mfcc=self.n_mfcc)
     self.mfcc_vc = vconv.VirtualConv(filter_info=self.mfcc_win_sz,
             stride=self.mfcc_hop_sz, parent=None, name='MFCC')
Esempio n. 9
0
 def __init__(self,
              sample_rate=16000,
              win_sz=400,
              hop_sz=160,
              n_mels=80,
              n_mfcc=13,
              name=None):
     self.sample_rate = sample_rate
     self.window_sz = win_sz
     self.hop_sz = hop_sz
     self.n_mels = n_mels
     self.n_mfcc = n_mfcc
     self.n_out = n_mfcc * 3
     self.vc = vconv.VirtualConv(filter_info=self.window_sz,
                                 stride=self.hop_sz,
                                 parent=None,
                                 name=name)
Esempio n. 10
0
    def __init__(self,
                 n_in_chan,
                 n_out_chan,
                 filter_sz,
                 stride=1,
                 do_res=True,
                 parent_vc=None,
                 name=None):
        super(ConvReLURes, self).__init__()
        self.n_in = n_in_chan
        self.n_out = n_out_chan
        self.conv = nn.Conv1d(n_in_chan,
                              n_out_chan,
                              filter_sz,
                              stride,
                              padding=0,
                              bias=True)
        self.relu = nn.ReLU()
        self.name = name
        # self.bn = nn.BatchNorm1d(n_out_chan)

        self.vc = vconv.VirtualConv(filter_info=filter_sz,
                                    stride=stride,
                                    parent=parent_vc,
                                    name=name)

        self.do_res = do_res
        if self.do_res:
            if stride != 1:
                print('Stride must be 1 for residually connected convolution',
                      file=stderr)
                raise ValueError
            l_off, r_off = vconv.output_offsets(self.vc, self.vc)
            self.register_buffer('residual_offsets',
                                 torch.tensor([l_off, r_off]))

        netmisc.xavier_init(self.conv)
Esempio n. 11
0
    def __init__(self, filter_sz, n_lc_in, n_lc_out, lc_upsample_filt_sizes,
            lc_upsample_strides, n_res, n_dil, n_skp, n_post, n_quant,
            n_blocks, n_block_layers, n_speakers, n_global_embed,
            bias=True, parent_vc=None):
        super(WaveNet, self).__init__()

        self.n_blocks = n_blocks
        self.n_block_layers = n_block_layers
        self.n_quant = n_quant
        self.quant_onehot = None 
        self.bias = bias
        post_jitter_filt_sz = 3
        lc_input_stepsize = np_prod(lc_upsample_strides) 

        lc_conv_name = 'LC_Conv(filter_size={})'.format(post_jitter_filt_sz) 
        self.lc_conv = Conv1dWrap(n_lc_in, n_lc_out,
                kernel_size=post_jitter_filt_sz, stride=1, bias=self.bias)

        cur_vc = vconv.VirtualConv(filter_info=post_jitter_filt_sz,
                stride=1, parent=parent_vc, name=lc_conv_name)

        self.vc = dict()
        self.vc['beg'] = cur_vc
        
        # This VC is the first processing of the local conditioning after the
        # Jitter. It is the starting point for the commitment loss aggregation
        self.vc['pre_upsample'] = cur_vc
        self.lc_upsample = nn.Sequential()

        # WaveNet is a stand-alone model, so parent_vc is None
        # The Autoencoder model in model.py will link parent_vcs together.
        for i, (filt_sz, stride) in enumerate(zip(lc_upsample_filt_sizes,
            lc_upsample_strides)):
            name = 'Upsampling_{}(filter_sz={}, stride={})'.format(i, filt_sz, stride)   
            mod = Upsampling(n_lc_out, filt_sz, stride, cur_vc, name=name)
            self.lc_upsample.add_module(str(i), mod)
            cur_vc = mod.vc

        # This vc describes the bounds of the input wav corresponding to the
        # local conditioning vectors
        self.vc['last_upsample'] = cur_vc
        self.cond = Conditioning(n_speakers, n_global_embed)
        self.base_layer = Conv1dWrap(n_quant, n_res, kernel_size=1, stride=1,
                dilation=1, bias=self.bias)

        self.conv_layers = nn.ModuleList() 
        n_cond = n_lc_out + n_global_embed

        for b in range(self.n_blocks):
            for bl in range(self.n_block_layers):
                dil = 2**bl
                name = 'GRCC_{},{}(dil={})'.format(b, bl, dil)
                grc = GatedResidualCondConv(self.vc, n_cond, n_res, n_dil,
                        n_skp, 1, dil, filter_sz, bias, cur_vc, name)
                self.conv_layers.append(grc)
                cur_vc = grc.vc

        # Each module in the stack needs to know the dimensions of
        # the input and output of the overall stack, in order to trim
        # residual connections
        self.vc['beg_grcc'] = self.conv_layers[0].vc
        self.vc['end_grcc'] = self.conv_layers[-1].vc 

        self.vc['beg_grcc'].do_trim_input = True

        self.relu = nn.ReLU()
        self.post1 = Conv1dWrap(n_skp, n_post, 1, bias=bias)
        self.post2 = Conv1dWrap(n_post, n_quant, 1, bias=bias)
        self.logsoftmax = nn.LogSoftmax(1) # (B, Q, N)
        self.vc['main'] = cur_vc