コード例 #1
0
ファイル: test_smoke.py プロジェクト: entn-at/percivaltts
    def test_data(self):
        import data

        fids = readids(cptest + 'file_id_list.scp')

        path, shape = data.getpathandshape('dummy.fwlspec')
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == None)
        path, shape = data.getpathandshape('dummy.fwlspec:(-1,129)')
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == (-1, 129))
        path, shape = data.getpathandshape('dummy.fwlspec:(-1,129)', (-1, 12))
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == (-1, 12))
        path, shape = data.getpathandshape('dummy.fwlspec', (-1, 12))
        self.assertTrue(path == 'dummy.fwlspec')
        self.assertTrue(shape == (-1, 12))
        dim = data.getlastdim('dummy.fwlspec')
        self.assertTrue(dim == 1)
        dim = data.getlastdim('dummy.fwlspec:(-1,129)')
        self.assertTrue(dim == 129)

        indir = cptest + 'binary_label_' + str(
            lab_size) + '_norm_minmaxm11/*.lab:(-1,' + str(lab_size) + ')'
        Xs = data.load(indir,
                       fids,
                       shape=None,
                       frameshift=0.005,
                       verbose=1,
                       label='Xs: ')
        self.assertTrue(len(Xs) == 10)
        print(Xs[0].shape)
        self.assertTrue(Xs[0].shape == (667, lab_size))

        print(data.gettotallen(Xs))
        self.assertTrue(data.gettotallen(Xs) == 5694)

        outdir = cptest + 'wav_cmp_lf0_fwlspec65_fwnm17_bndnmnoscale/*.cmp:(-1,83)'
        Ys = data.load(outdir,
                       fids,
                       shape=None,
                       frameshift=0.005,
                       verbose=1,
                       label='Ys: ')
        print('len(Ys)=' + str(len(Ys)))
        self.assertTrue(len(Ys) == 10)
        print('Ys[0].shape' + str(Ys[0].shape))
        self.assertTrue(Ys[0].shape == (666, 83))

        wdir = cptest + 'wav_fwlspec65_weights/*.w:(-1,1)'
        Ws = data.load(wdir,
                       fids,
                       shape=None,
                       frameshift=0.005,
                       verbose=1,
                       label='Ws: ')
        self.assertTrue(len(Ws) == 10)

        Xs, Ys, Ws = data.croplen([Xs, Ys, Ws])

        [Xs, Ys], Ws = data.croplen_weight([Xs, Ys], Ws, thresh=0.5)

        Xs_w_stop = data.addstop(Xs)

        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift',
            inouttimesync=False)
        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift')
        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift',
            cropmode='begendbigger')
        X_train, MX_train, Y_train, MY_train, W_train = data.load_inoutset(
            indir,
            outdir,
            wdir,
            fids,
            length=None,
            lengthmax=100,
            maskpadtype='randshift',
            cropmode='all')

        worst_val = data.cost_0pred_rmse(Ys)
        print('worst_val={}'.format(worst_val))

        worst_val = data.cost_0pred_rmse(Ys[0])
        print('worst_val={}'.format(worst_val))

        def data_cost_model_mfn(Xs, Ys):
            return np.std(Ys)  # TODO More usefull

        X_vals = data.load(indir, fids)
        Y_vals = data.load(outdir, fids)
        X_vals, Y_vals = data.croplen([X_vals, Y_vals])
        cost = data.cost_model_mfn(data_cost_model_mfn, [X_vals, Y_vals])
        print(cost)

        class SmokyModel:
            def predict(self, Xs):
                return np.zeros([1, Xs.shape[1], 83])

        mod = SmokyModel()
        cost = data.cost_model_prediction_rmse(mod, [Xs], Ys)
        print(cost)

        std = data.prediction_mstd(mod, [Xs])
        print(std)

        rms = data.prediction_rms(mod, [Xs])
        print(rms)
コード例 #2
0
    def train_oneparamset(self, indir, outdir, wdir, fid_lst_tra, fid_lst_val, params_savefile, trialstr='', cont=None):

        print('Loading all validation data at once ...')
        # X_val, Y_val = data.load_inoutset(indir, outdir, wdir, fid_lst_val, verbose=1)
        X_vals = data.load(indir, fid_lst_val, verbose=1, label='Context labels: ')
        Y_vals = data.load(outdir, fid_lst_val, verbose=1, label='Output features: ')
        X_vals, Y_vals = data.croplen([X_vals, Y_vals])
        print('    {} validation files'.format(len(fid_lst_val)))
        print('    number of validation files / train files: {:.2f}%'.format(100.0*float(len(fid_lst_val))/len(fid_lst_tra)))

        print('Model initial status before training')
        worst_val = data.cost_0pred_rmse(Y_vals)
        print("    0-pred validation RMSE = {} (100%)".format(worst_val))
        init_pred_rms = data.prediction_rms(self._model, [X_vals])
        print('    initial RMS of prediction = {}'.format(init_pred_rms))
        init_val = data.cost_model_prediction_rmse(self._model, [X_vals], Y_vals)
        best_val = None
        print("    initial validation RMSE = {} ({:.4f}%)".format(init_val, 100.0*init_val/worst_val))

        nbbatches = int(len(fid_lst_tra)/self.cfg.train_batch_size)
        print('    using {} batches of {} sentences each'.format(nbbatches, self.cfg.train_batch_size))
        print('    model #parameters={}'.format(self._model.count_params()))

        nbtrainframes = 0
        for fid in fid_lst_tra:
            X = data.loadfile(outdir, fid)
            nbtrainframes += X.shape[0]
        print('    Training set: {} sentences, #frames={} ({})'.format(len(fid_lst_tra), nbtrainframes, time.strftime('%H:%M:%S', time.gmtime((nbtrainframes*self._model.vocoder.shift)))))
        print('    #parameters/#frames={:.2f}'.format(float(self._model.count_params())/nbtrainframes))
        if self.cfg.train_nbepochs_scalewdata and not self.cfg.train_batch_lengthmax is None:
            # During an epoch, the whole data is _not_ seen by the training since cfg.train_batch_lengthmax is limited and smaller to the sentence size.
            # To compensate for it and make the config below less depedent on the data, the min ans max nbepochs are scaled according to the missing number of frames seen.
            # TODO Should consider only non-silent frames, many recordings have a lot of pre and post silences
            epochcoef = nbtrainframes/float((self.cfg.train_batch_lengthmax*len(fid_lst_tra)))
            print('    scale number of epochs wrt number of frames')
            self.cfg.train_min_nbepochs = int(self.cfg.train_min_nbepochs*epochcoef)
            self.cfg.train_max_nbepochs = int(self.cfg.train_max_nbepochs*epochcoef)
            print('        train_min_nbepochs={}'.format(self.cfg.train_min_nbepochs))
            print('        train_max_nbepochs={}'.format(self.cfg.train_max_nbepochs))

        self.prepare()  # This has to be overwritten by sub-classes

        costs = defaultdict(list)
        epochs_modelssaved = []
        epochs_durs = []
        nbnodecepochs = 0
        generator_updates = 0
        epochstart = 1
        if cont and len(glob.glob(os.path.splitext(params_savefile)[0]+'-trainingstate-last.h5*'))>0:
            print('    reloading previous training state ...')
            savedcfg, extras, rngstate = self.loadTrainingState(os.path.splitext(params_savefile)[0]+'-trainingstate-last.h5')
            np.random.set_state(rngstate)
            cost_val = extras['cost_val']
            # Restoring some local variables
            costs = extras['costs']
            epochs_modelssaved = extras['epochs_modelssaved']
            epochs_durs = extras['epochs_durs']
            generator_updates = extras['generator_updates']
            epochstart = extras['epoch']+1
            # Restore the saving criteria if only none of those 3 cfg values changed:
            if (savedcfg.train_min_nbepochs==self.cfg.train_min_nbepochs) and (savedcfg.train_max_nbepochs==self.cfg.train_max_nbepochs) and (savedcfg.train_cancel_nodecepochs==self.cfg.train_cancel_nodecepochs):
                best_val = extras['best_val']
                nbnodecepochs = extras['nbnodecepochs']

        print_log("    start training ...")
        epoch = -1
        for epoch in range(epochstart,1+self.cfg.train_max_nbepochs):
            timeepochstart = time.time()
            rndidx = np.arange(int(nbbatches*self.cfg.train_batch_size))    # Need to restart from ordered state to make the shuffling repeatable after reloading training state, the shuffling will be different anyway
            np.random.shuffle(rndidx)
            rndidxb = np.split(rndidx, nbbatches)
            cost_tra = None
            costs_tra_batches = []
            costs_tra_gen_wgan_lse_ratios = []
            load_times = []
            train_times = []
            for batchid in xrange(nbbatches):

                timeloadstart = time.time()
                print_tty('\r    Training batch {}/{}'.format(1+batchid, nbbatches))

                # Load training data online, because data is often too heavy to hold in memory
                fid_lst_trab = [fid_lst_tra[bidx] for bidx in rndidxb[batchid]]
                X_trab, Y_trab, W_trab = data.load_inoutset(indir, outdir, wdir, fid_lst_trab, length=self.cfg.train_batch_length, lengthmax=self.cfg.train_batch_lengthmax, maskpadtype=self.cfg.train_batch_padtype, cropmode=self.cfg.train_batch_cropmode)

                if 0: # Plot batch
                    import matplotlib.pyplot as plt
                    plt.ion()
                    plt.imshow(Y_trab[0,].T, origin='lower', aspect='auto', interpolation='none', cmap='jet')
                    from IPython.core.debugger import  Pdb; Pdb().set_trace()

                load_times.append(time.time()-timeloadstart)
                print_tty(' (iter load: {:.6f}s); training '.format(load_times[-1]))

                timetrainstart = time.time()

                cost_tra = self.train_on_batch(batchid, X_trab, Y_trab)  # This has to be overwritten by sub-classes

                train_times.append(time.time()-timetrainstart)

                if not cost_tra is None:
                    print_tty('err={:.4f} (iter train: {:.4f}s)                  '.format(cost_tra,train_times[-1]))
                    if np.isnan(cost_tra):                      # pragma: no cover
                        print_log('    previous costs: {}'.format(costs_tra_batches))
                        print_log('    E{} Batch {}/{} train cost = {}'.format(epoch, 1+batchid, nbbatches, cost_tra))
                        raise ValueError('ERROR: Training cost is nan!')
                    costs_tra_batches.append(cost_tra)
            print_tty('\r                                                           \r')
            costs['model_training'].append(np.mean(costs_tra_batches))

            cost_val = self.update_validation_cost(costs, X_vals, Y_vals)  # This has to be overwritten by sub-classes

            print_log("    E{}/{} {}  cost_tra={:.6f} (load:{}s train:{}s)  cost_val={:.6f} ({:.4f}% RMSE)  {} MiB GPU {} MiB RAM".format(epoch, self.cfg.train_max_nbepochs, trialstr, costs['model_training'][-1], time2str(np.sum(load_times)), time2str(np.sum(train_times)), cost_val, 100*costs['model_rmse_validation'][-1]/worst_val, tf_gpu_memused(), proc_memresident()))
            sys.stdout.flush()

            if np.isnan(cost_val): raise ValueError('ERROR: Validation cost is nan!')
            # if (self._errtype=='LSE') and (cost_val>=self.cfg.train_cancel_validthresh*worst_val): raise ValueError('ERROR: Validation cost blew up! It is higher than {} times the worst possible values'.format(self.cfg.train_cancel_validthresh)) # TODO

            self._model.save(os.path.splitext(params_savefile)[0]+'-last.h5', printfn=print_log, extras={'cost_val':cost_val})

            # Save model parameters
            if epoch>=self.cfg.train_min_nbepochs: # Assume no model is good enough before self.cfg.train_min_nbepochs
                if ((best_val is None) or (cost_val<best_val)): # Among all trials of hyper-parameter optimisation
                    best_val = cost_val
                    self._model.save(params_savefile, printfn=print_log, extras={'cost_val':cost_val}, infostr='(E{} C{:.4f})'.format(epoch, best_val))
                    epochs_modelssaved.append(epoch)
                    nbnodecepochs = 0
                else:
                    nbnodecepochs += 1

            if self.cfg.train_log_plot:
                print_log('    saving plots')
                log_plot_costs(costs, worst_val, fname=os.path.splitext(params_savefile)[0]+'-fig_costs_'+trialstr+'.svg', epochs_modelssaved=epochs_modelssaved)

                nbsamples = 2
                nbsamples = min(nbsamples, len(X_vals))
                Y_preds = []
                for sampli in xrange(nbsamples): Y_preds.append(self._model.predict(np.reshape(X_vals[sampli],[1]+[s for s in X_vals[sampli].shape]))[0,])

                plotsuffix = ''
                if len(epochs_modelssaved)>0 and epochs_modelssaved[-1]==epoch: plotsuffix='_best'
                else:                                                           plotsuffix='_last'
                log_plot_samples(Y_vals, Y_preds, nbsamples=nbsamples, fname=os.path.splitext(params_savefile)[0]+'-fig_samples_'+trialstr+plotsuffix+'.png', vocoder=self._model.vocoder, title='E{}'.format(epoch))

            epochs_durs.append(time.time()-timeepochstart)
            print_log('    ET: {}   max TT: {}s   train ~time left: {}'.format(time2str(epochs_durs[-1]), time2str(np.median(epochs_durs[-10:])*self.cfg.train_max_nbepochs), time2str(np.median(epochs_durs[-10:])*(self.cfg.train_max_nbepochs-epoch))))

            self.saveTrainingState(os.path.splitext(params_savefile)[0]+'-trainingstate-last.h5', printfn=print_log, extras={'cost_val':cost_val, 'best_val':best_val, 'costs':costs, 'epochs_modelssaved':epochs_modelssaved, 'epochs_durs':epochs_durs, 'nbnodecepochs':nbnodecepochs, 'generator_updates':generator_updates, 'epoch':epoch})

            if nbnodecepochs>=self.cfg.train_cancel_nodecepochs: # pragma: no cover
                print_log('WARNING: validation error did not decrease for {} epochs. Early stop!'.format(self.cfg.train_cancel_nodecepochs))
                break

        if best_val is None: raise ValueError('No model has been saved during training!')
        return {'epoch_stopped':epoch, 'worst_val':worst_val, 'best_epoch':epochs_modelssaved[-1] if len(epochs_modelssaved)>0 else -1, 'best_val':best_val}
コード例 #3
0
ファイル: optimizer.py プロジェクト: entn-at/percivaltts
    def train_multipletrials(self,
                             indir,
                             outdir,
                             wdir,
                             fid_lst_tra,
                             fid_lst_val,
                             params,
                             params_savefile,
                             cfgtomerge=None,
                             cont=None,
                             **kwargs):
        # Hyp: always uses batches

        # All kwargs arguments are specific configuration values
        # First, fill a struct with the default configuration values ...
        cfg = configuration()  # Init structure

        # LSE
        cfg.train_learningrate_log10 = -3.39794  # [potential hyper-parameter] (10**-3.39794=0.0004)
        cfg.train_adam_beta1 = 0.9  # [potential hyper-parameter]
        cfg.train_adam_beta2 = 0.999  # [potential hyper-parameter]
        cfg.train_adam_epsilon_log10 = -8  # [potential hyper-parameter]
        # WGAN
        cfg.train_D_learningrate = 0.0001  # [potential hyper-parameter]
        cfg.train_D_adam_beta1 = 0.5  # [potential hyper-parameter]
        cfg.train_D_adam_beta2 = 0.9  # [potential hyper-parameter]
        cfg.train_G_learningrate = 0.001  # [potential hyper-parameter]
        cfg.train_G_adam_beta1 = 0.5  # [potential hyper-parameter]
        cfg.train_G_adam_beta2 = 0.9  # [potential hyper-parameter]
        cfg.train_pg_lambda = 10  # [potential hyper-parameter]
        cfg.train_LScoef = 0.25  # If >0, mix LSE and WGAN losses (def. 0.25)
        cfg.train_validation_ltm_winlen = 20  # Now that I'm using min and max epochs, I could use the actuall D cost and not the ltm(D cost) TODO

        cfg.train_min_nbepochs = 200
        cfg.train_max_nbepochs = 300
        cfg.train_nbepochs_scalewdata = True
        cfg.train_cancel_nodecepochs = 50
        cfg.train_cancel_validthresh = 10.0  # Cancel train if valid err is more than N times higher than the initial worst valid err
        cfg.train_batch_size = 5  # [potential hyper-parameter]
        cfg.train_batch_padtype = 'randshift'  # See load_inoutset(..., maskpadtype)
        cfg.train_batch_cropmode = 'begendbigger'  # 'begend', 'begendbigger', 'all'
        cfg.train_batch_length = None  # Duration [frames] of each batch (def. None, i.e. the shortest duration of the batch if using maskpadtype = 'randshift') # TODO Remove for lengthmax
        cfg.train_batch_lengthmax = None  # Maximum duration [frames] of each batch
        cfg.train_nbtrials = 1  # Just run one training only
        cfg.train_hypers = []
        #cfg.train_hypers = [('learningrate_log10', -6.0, -2.0), ('adam_beta1', 0.8, 1.0)] # For ADAM
        #cfg.train_hyper = [('train_D_learningrate', 0.0001, 0.1), ('train_D_adam_beta1', 0.8, 1.0), ('train_D_adam_beta2', 0.995, 1.0), ('train_batch_size', 1, 200)] # For ADAM
        cfg.train_log_plot = True
        # ... add/overwrite configuration from cfgtomerge ...
        if not cfgtomerge is None: cfg.merge(cfgtomerge)
        # ... and add/overwrite specific configuration from the generic arguments
        for kwarg in kwargs.keys():
            setattr(cfg, kwarg, kwargs[kwarg])

        print('Training configuration')
        cfg.print_content()

        print('Loading all validation data at once ...')
        # X_val, Y_val = data.load_inoutset(indir, outdir, wdir, fid_lst_val, verbose=1)
        X_vals = data.load(indir,
                           fid_lst_val,
                           verbose=1,
                           label='Context labels: ')
        Y_vals = data.load(outdir,
                           fid_lst_val,
                           verbose=1,
                           label='Output features: ')
        X_vals, Y_vals = data.croplen([X_vals, Y_vals])
        print('    {} validation files'.format(len(fid_lst_val)))
        print(
            '    {:.2f}% of validation files for number of train files'.format(
                100.0 * float(len(fid_lst_val)) / len(fid_lst_tra)))

        if cfg.train_nbtrials > 1:
            self._model.saveAllParams(os.path.splitext(params_savefile)[0] +
                                      '-init.pkl',
                                      cfg=cfg,
                                      printfn=print_log)

        try:
            trials = []
            for triali in xrange(
                    1, 1 + cfg.train_nbtrials
            ):  # Run multiple trials with different hyper-parameters
                print('\nStart trial {} ...'.format(triali))

                try:
                    train_rets = None
                    trialstr = 'trial' + str(triali)
                    if len(cfg.train_hypers) > 0:
                        cfg, hyperstr = self.randomize_hyper(cfg)
                        trialstr += ',' + hyperstr
                        print('    randomized hyper-parameters: ' + trialstr)
                    if cfg.train_nbtrials > 1:
                        self._model.loadAllParams(
                            os.path.splitext(params_savefile)[0] + '-init.pkl')

                    timewholetrainstart = time.time()
                    train_rets = self.train(params,
                                            indir,
                                            outdir,
                                            wdir,
                                            fid_lst_tra,
                                            fid_lst_val,
                                            X_vals,
                                            Y_vals,
                                            cfg,
                                            params_savefile,
                                            trialstr=trialstr,
                                            cont=cont)
                    cont = None
                    print_log('Total trial run time: {}s'.format(
                        time2str(time.time() - timewholetrainstart)))

                except KeyboardInterrupt:  # pragma: no cover
                    raise KeyboardInterrupt
                except (ValueError, GpuArrayException):  # pragma: no cover
                    if len(cfg.train_hypers) > 0:
                        print_log('WARNING: Training crashed!')
                        import traceback
                        traceback.print_exc()
                    else:
                        print_log('ERROR: Training crashed!')
                        raise  # Crash the whole training if there is only one trial

                if cfg.train_nbtrials > 1:
                    # Save the results of each trial, but only the non-crashed trials
                    if not train_rets is None:
                        ntrialline = [triali] + [
                            getattr(cfg, field[0])
                            for field in cfg.train_hypers
                        ]
                        ntrialline = ntrialline + [
                            train_rets[key]
                            for key in sorted(train_rets.keys())
                        ]
                        header = 'trials ' + ' '.join([
                            field[0] for field in cfg.train_hypers
                        ]) + ' ' + ' '.join(sorted(train_rets.keys()))
                        trials.append(ntrialline)
                        np.savetxt(os.path.splitext(params_savefile)[0] +
                                   '-trials.txt',
                                   np.vstack(trials),
                                   header=header)

        except KeyboardInterrupt:  # pragma: no cover
            print_log('WARNING: Training interrupted by user!')

        print_log('Finished')
コード例 #4
0
    def generate_wav(
            self,
            inpath,
            outpath,
            fid_lst,
            syndir,
            vocoder,
            wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]],
            do_objmeas=True,
            do_resynth=True,
            pp_mcep=False,
            pp_spec_pf_coef=-1  # Common value is 1.2
        ,
            pp_spec_extrapfreq=-1):
        from external.pulsemodel import sigproc as sp

        print('Reloading output stats')
        # Assume mean/std normalisation of the output
        Ymean = np.fromfile(os.path.dirname(outpath) + '/mean4norm.dat',
                            dtype='float32')
        Ystd = np.fromfile(os.path.dirname(outpath) + '/std4norm.dat',
                           dtype='float32')

        print('\nLoading generation data at once ...')
        X_test = data.load(inpath, fid_lst, verbose=1)
        if do_objmeas:
            y_test = data.load(outpath, fid_lst, verbose=1)
            X_test, y_test = data.croplen((X_test, y_test))

        def denormalise(CMP, wins=[[-0.5, 0.0, 0.5], [1.0, -2.0, 1.0]]):

            CMP = CMP * np.tile(Ystd, (CMP.shape[0], 1)) + np.tile(
                Ymean, (CMP.shape[0], 1))  # De-normalise

            if len(wins) > 0:
                # Apply MLPG
                from external.merlin.mlpg_fast import MLParameterGenerationFast as MLParameterGeneration
                mlpg_algo = MLParameterGeneration(delta_win=wins[0],
                                                  acc_win=wins[1])
                var = np.tile(Ystd**2, (CMP.shape[0], 1))  # Simplification!
                CMP = mlpg_algo.generation(CMP, var, len(Ymean) / 3)
            else:
                CMP = CMP[:, :vocoder.featuressize()]

            return CMP

        if not os.path.isdir(syndir): os.makedirs(syndir)
        if do_resynth and (not os.path.isdir(syndir + '-resynth')):
            os.makedirs(syndir + '-resynth')

        for vi in xrange(len(X_test)):

            print('Generating {}/{} ...'.format(1 + vi, len(X_test)))
            print('    Predict ...')

            if do_resynth:
                CMP = denormalise(y_test[vi], wins=[])
                resyn = vocoder.synthesis(vocoder.fs, CMP, pp_mcep=False)
                sp.wavwrite(syndir + '-resynth/' + fid_lst[vi] + '.wav',
                            resyn,
                            vocoder.fs,
                            norm_abs=True,
                            force_norm_abs=True,
                            verbose=1)

            CMP = self.predict(
                np.reshape(X_test[vi], [1] + [s for s in X_test[vi].shape]))
            CMP = CMP[0, :, :]

            CMP = denormalise(CMP, wins=wins)
            syn = vocoder.synthesis(vocoder.fs, CMP, pp_mcep=pp_mcep)
            sp.wavwrite(syndir + '/' + fid_lst[vi] + '.wav',
                        syn,
                        vocoder.fs,
                        norm_abs=True,
                        force_norm_abs=True,
                        verbose=1)

            if do_objmeas: vocoder.objmeasures_add(CMP, y_test[vi])

        if do_objmeas: vocoder.objmeasures_stats()

        print_log('Generation finished')
コード例 #5
0
    def generate_wav(
            self,
            inpath,
            outpath,
            fid_lst,
            syndir,
            do_objmeas=True,
            do_resynth=True,
            pp_mcep=False,
            pp_spec_pf_coef=-1  # Common value is 1.2
        ,
            pp_spec_extrapfreq=-1,
            pp_f0_smooth=None):
        from external.pulsemodel import sigproc as sp

        print('Reloading output stats')
        # Assume mean/std normalisation of the output
        Ymean = np.fromfile(os.path.dirname(outpath) + '/mean4norm.dat',
                            dtype='float32')
        Ystd = np.fromfile(os.path.dirname(outpath) + '/std4norm.dat',
                           dtype='float32')

        print('\nLoading generation data at once ...')
        X_test = data.load(inpath, fid_lst, verbose=1)
        if do_objmeas or do_resynth:
            y_test = data.load(outpath, fid_lst, verbose=1)
            X_test, y_test = data.croplen((X_test, y_test))

        def denormalise(CMP, mlpg_ignore=False):

            CMP = CMP * np.tile(Ystd, (CMP.shape[0], 1)) + np.tile(
                Ymean, (CMP.shape[0], 1))  # De-normalise

            # TODO Should go in the vocoder, but there is Ystd to put as argument ...
            #      Though, the vocoder is not taking care of the deltas composition during data composition either.
            if (not self.vocoder.mlpg_wins is None) and len(
                    self.vocoder.mlpg_wins) > 0:  # If MLPG is used
                if mlpg_ignore:
                    CMP = CMP[:, :self.vocoder.featuressizeraw()]
                else:
                    # Apply MLPG
                    from external.merlin.mlpg_fast import MLParameterGenerationFast as MLParameterGeneration
                    mlpg_algo = MLParameterGeneration(
                        delta_win=self.vocoder.mlpg_wins[0],
                        acc_win=self.vocoder.mlpg_wins[1])
                    var = np.tile(Ystd**2,
                                  (CMP.shape[0], 1))  # Simplification!
                    CMP = mlpg_algo.generation(CMP, var,
                                               self.vocoder.featuressizeraw())

            return CMP

        if not os.path.isdir(syndir): os.makedirs(syndir)
        if do_resynth and (not os.path.isdir(syndir + '-resynth')):
            os.makedirs(syndir + '-resynth')

        for vi in xrange(len(X_test)):

            print('Generating {}/{} fid={} ...'.format(1 + vi, len(X_test),
                                                       fid_lst[vi]))
            print('    Predict ...')

            if do_resynth:
                CMP = denormalise(y_test[vi], mlpg_ignore=True)
                resyn = self.vocoder.synthesis(CMP, pp_mcep=False)
                sp.wavwrite(syndir + '-resynth/' + fid_lst[vi] + '.wav',
                            resyn,
                            self.vocoder.fs,
                            norm_max=True,
                            verbose=1)

            CMP = self.predict(
                np.reshape(X_test[vi], [1] + [s for s in X_test[vi].shape]))
            CMP = CMP[0, :, :]

            CMP = denormalise(CMP)
            syn = self.vocoder.synthesis(CMP,
                                         pp_mcep=pp_mcep,
                                         pp_f0_smooth=pp_f0_smooth)
            sp.wavwrite(syndir + '/' + fid_lst[vi] + '.wav',
                        syn,
                        self.vocoder.fs,
                        norm_max=True,
                        verbose=1)

            if do_objmeas: self.vocoder.objmeasures_add(CMP, y_test[vi])

        if do_objmeas: self.vocoder.objmeasures_stats()

        print_log('Generation finished')