Exemple #1
0
    def __init__(self, pre_params, n_quant):
        super(PreProcess, self).__init__()
        self.mfcc = mfcc.ProcessWav(**pre_params, name='mfcc')
        self.rf = self.mfcc.rf
        self.n_quant = n_quant
        self.register_buffer('quant_onehot', torch.eye(self.n_quant))

        # A dummy buffer that simply allows querying the current model device
        self.register_buffer('dummy_buf', torch.empty(0))
Exemple #2
0
    def __init__(self, hps):
        super(MfccInverter, self).__init__()
        self.bn_type = 'none' 
        self.mfcc = mfcc.ProcessWav(
                sample_rate=hps.sample_rate, win_sz=hps.mfcc_win_sz,
                hop_sz=hps.mfcc_hop_sz, n_mels=hps.n_mels, n_mfcc=hps.n_mfcc)

        mfcc_vc = vconv.VirtualConv(filter_info=hps.mfcc_win_sz,
                stride=hps.mfcc_hop_sz, parent=None, name='MFCC')

        self.wavenet = wn.WaveNet(hps, parent_vc=mfcc_vc)
        self.objective = wn.RecLoss()
        self._init_geometry(hps.n_win_batch)
Exemple #3
0
 def _initialize(self):
     super(Slice, self).__init__()
     self.target_device = None
     self.__dict__.update(self.init_args)
     self.jitter = jitter.Jitter(self.jitter_prob) 
     self.mfcc_proc = mfcc.ProcessWav(
             sample_rate=self.sample_rate,
             win_sz=self.mfcc_win_sz,
             hop_sz=self.mfcc_hop_sz,
             n_mels=self.n_mels,
             n_mfcc=self.n_mfcc)
     self.mfcc_vc = vconv.VirtualConv(filter_info=self.mfcc_win_sz,
             stride=self.mfcc_hop_sz, parent=None, name='MFCC')
Exemple #4
0
    def __init__(self, n_mid, sample_rate_ms, win_length_ms, hop_length_ms,
                 n_mels, n_mfcc):
        super(Encoder, self).__init__()

        self.pre = mfcc.ProcessWav(sample_rate_ms, win_length_ms,
                                   hop_length_ms, n_mels, n_mfcc)

        n_in = self.pre.n_out

        self.net = nn.Sequential(
            ConvReLURes(n_in, n_mid, 3, do_res=False),
            ConvReLURes(n_mid, n_mid, 3),
            ConvReLURes(n_mid, n_mid, 4, stride=2, do_res=False),
            ConvReLURes(n_mid, n_mid, 3), ConvReLURes(n_mid, n_mid, 3),
            ConvReLURes(n_mid, n_mid, 1), ConvReLURes(n_mid, n_mid, 1),
            ConvReLURes(n_mid, n_mid, 1), ConvReLURes(n_mid, n_mid, 1))
Exemple #5
0
def convert(catalog,
            pfx,
            n_quant,
            sample_rate=16000,
            win_sz=400,
            hop_sz=160,
            n_mels=80,
            n_mfcc=13):

    mfcc_proc = mfcc.ProcessWav(sample_rate, win_sz, hop_sz, n_mels, n_mfcc)

    if n_quant <= 2**8:
        snd_dtype = np.uint8
    elif n_quant <= 2**15:
        snd_dtype = np.int16
    else:
        snd_dtype = np.int32

    snd_file = pfx + '.dat'
    ind_file = pfx + '.ind'
    mel_file = pfx + '.mel'
    ind = {'voice_id': [], 'n_snd_elem': [], 'n_mel_elem': [], 'snd_path': []}
    n_snd_elem = 0
    n_mel_elem = 0
    n_mel_chan = None

    with open(snd_file, 'wb') as snd_fh, open(mel_file, 'wb') as mel_fh:
        for (voice_id, snd_path) in catalog:
            snd, _ = librosa.load(snd_path, sample_rate)
            snd_mu = util.mu_encode_np(snd, n_quant).astype(snd_dtype)
            # mel: C, T  (n_mels, n_timesteps)
            # reshape to T, C and flatten
            mel = mfcc_proc.func(snd)
            if n_mel_chan is None:
                n_mel_chan = mel.shape[0]

            mel = mel.transpose((1, 0)).flatten()
            snd_fh.write(snd_mu.data)
            mel_fh.write(mel.data)
            ind['voice_id'].append(voice_id)
            ind['n_snd_elem'].append(snd.size)
            ind['n_mel_elem'].append(mel.size)
            ind['snd_path'].append(snd_path)
            if len(ind['voice_id']) % 100 == 0:
                print('Converted {} files of {}.'.format(len(ind['voice_id']),
                                                         len(catalog),
                                                         file=stderr))
                stderr.flush()
            n_snd_elem += snd.size
            n_mel_elem += mel.size

    with open(ind_file, 'wb') as ind_fh:
        index = {
            'window_size': win_sz,
            'hop_size': hop_sz,
            'n_snd_elem': n_snd_elem,
            'n_mel_elem': n_mel_elem,
            'n_mel_chan': n_mel_chan,
            'snd_dtype': snd_dtype,
            'n_quant': n_quant
        }
        index.update(ind)
        pickle.dump(index, ind_fh)