Example #1
0
    def set_weights_per_feature(self, weights):

        self.target_weight_vector = np.array(
            weights[self.config['wave_context_length']:])
        extended_weight_vector = np.array(weights)
        self.train_unit_features = weight(self.train_unit_features_unweighted,
                                          extended_weight_vector)
        if self.holdout_percent > 0.0:
            self.train_unit_features_dev = weight(
                self.train_unit_features_unweighted_dev,
                extended_weight_vector)
Example #2
0
    def set_target_weights(self, weights):
        assert len(weights) == len(self.stream_list_target), (weights, self.stream_list_target)
        ## get from per-stream to per-coeff weights:
        target_weight_vector = []
        for (i,stream) in enumerate(self.stream_list_target):
            # if stream in const.vuv_stream_names:
            #     target_weight_vector.extend([weights[i]]*2)
            # else:
                target_weight_vector.extend([weights[i]]*self.datadims_target[stream])
        nrepetitions = const.target_rep_widths[self.target_representation]
        target_weight_vector = np.array(target_weight_vector * nrepetitions)   
        self.train_unit_features = weight(self.train_unit_features_unweighted, target_weight_vector)   

        if self.holdout_samples > 0:
            self.train_unit_features_dev = weight(self.train_unit_features_unweighted_dev, target_weight_vector)   

        ## save this so we can weight incoming predicted acoustics: 
        self.target_weight_vector = target_weight_vector
Example #3
0
    def set_weights(self, weights):
        assert len(weights) == len(
            self.stream_list_target), (weights, self.stream_list_target)
        ## get from per-stream to per-coeff weights:
        target_weight_vector = []
        for (i, stream) in enumerate(self.stream_list_target):
            target_weight_vector.extend([weights[i]] *
                                        self.datadims_target[stream])
        nrepetitions = const.target_rep_widths[self.target_representation]
        target_weight_vector = np.array(target_weight_vector * nrepetitions)
        ## save this so we can weight incoming predicted acoustics:
        self.target_weight_vector = target_weight_vector

        ### extend with arbitrary weights (1s) for waveform history:-
        extended_weight_vector = np.concatenate([
            np.ones(self.config['wave_context_length']), target_weight_vector
        ])
        self.train_unit_features = weight(self.train_unit_features_unweighted,
                                          extended_weight_vector)
        if self.holdout_percent > 0.0:
            self.train_unit_features_dev = weight(
                self.train_unit_features_unweighted_dev,
                extended_weight_vector)
Example #4
0
    def set_join_weights(self, weights):
        assert len(weights) == len(self.stream_list_join)
        
        ## get from per-stream to per-coeff weights:
        join_weight_vector = []
        for (i,stream) in enumerate(self.stream_list_join):
            # if stream in const.vuv_stream_names:
            #     join_weight_vector.extend([weights[i]]*2)
            # else:
                join_weight_vector.extend([weights[i]]*self.datadims_join[stream])

        join_weight_vector = np.array(join_weight_vector)
        ## TODO: be more explicit about how this copies and does NOT weight original self.join_contexts_unweighted
        join_contexts_weighted = weight(self.join_contexts_unweighted, join_weight_vector)   

        ## This should not copy:
        self.unit_end_data = join_contexts_weighted[1:,:]
        self.unit_start_data = join_contexts_weighted[:-1,:]   ## <-- only this one is used!

        if self.holdout_samples > 0:
            self.unit_end_data = self.unit_end_data[:-self.holdout_samples,:]
            self.unit_start_data = self.unit_start_data[:-self.holdout_samples,:]
Example #5
0
    def synth_utt(self, base, synth_type='tune', outstem=''):

        if synth_type == 'test':
            data_dirs = self.test_data_target_dirs
            # lab_dir = self.config['test_lab_dir']
        elif synth_type == 'tune':
            data_dirs = self.tune_data_target_dirs
            # lab_dir = self.config['tune_lab_dir']
        else:
            sys.exit('Unknown synth_type  943957011')

        if not outstem:
            train_condition = make_train_condition_name(self.config)
            synth_condition = self.make_synthesis_condition_name()
            synth_dir = os.path.join(self.config['workdir'],
                                     'synthesis_%s' % (synth_type),
                                     train_condition, synth_condition)
            safe_makedir(synth_dir)

            self.report('               ==== SYNTHESISE %s ====' % (base))
            outstem = os.path.join(synth_dir, base)
        else:
            self.report('               ==== SYNTHESISE %s ====' % (outstem))

        start_time = self.start_clock('Get speech ')
        speech = compose_speech(data_dirs, base, self.stream_list_target, \
                                self.config['datadims_target'])

        ### upsample before standardisation (inefficient, but standardisation rewrites uv values?? TODO: check this)
        nframes, dim = speech.shape
        len_wave = int(self.rate * self.fshift_seconds * nframes)
        speech = resample.upsample(len_wave,
                                   self.rate,
                                   self.fshift_seconds,
                                   speech,
                                   f0_dim=-1,
                                   convention='world')

        if (self.config['standardise_target_data'], True):
            speech = standardise(speech, self.mean_vec_target,
                                 self.std_vec_target)

        #fshift_seconds = (0.001 * self.config['frameshift_ms'])
        #fshift = int(self.config['sample_rate'] * fshift_seconds)

        unit_features = speech

        unit_features = weight(unit_features, self.target_weight_vector)

        #### TEMp!!!!!!
        #unit_features = unit_features[2000:3000, :]

        n_units, _ = unit_features.shape
        self.stop_clock(start_time)

        ### always do greedy search for sample-based selection
        best_path, gen_wave = self.greedy_joint_search(unit_features)

        #print best_path
        #print gen_wave

        if NORMWAVE:

            print 'predenorm stats:'
            print(gen_wave.mean(), gen_wave.std())
            ### denormalise:-
            gen_wave = (gen_wave * self.wave_std
                        ) + self.wave_mean  # gen_wave + self.wave_mean #
            print 'denorm stats:'
            print(gen_wave.mean(), gen_wave.std())

        if self.config['nonlin_wave']:
            gen_wave = mu2lin(gen_wave)

            # print 'linear stats:'
            # print (gen_wave.mean(), gen_wave.std())

        # pylab.plot(gen_wave)
        # pylab.show()

        if self.mode_of_operation == 'stream_weight_balancing':
            self.report('')
            self.report('balancing stream weights -- skip making waveform')
            self.report('')
        else:
            start_time = self.start_clock('Wrtie wave')
            write_wave(gen_wave, outstem + '.wav', self.rate)
            self.stop_clock(start_time)
            self.report('Output wave: %s.wav' % (outstem))
            self.report('')
            self.report('')
Example #6
0
    def synth_utt(self, base, synth_type='tune', outstem='', outdir=''): 

        if synth_type == 'test':
            data_dirs = self.test_data_target_dirs
            lab_dir = self.config.get('test_lab_dir', '') ## default added for pure acoustic epoch case
        elif synth_type == 'tune':
            data_dirs = self.tune_data_target_dirs
            lab_dir = self.config.get('tune_lab_dir', '') ## default added for pure acoustic epoch case
        else:
            sys.exit('Unknown synth_type  9489384')

        if outdir:
            assert not outstem

        if not outstem:
            train_condition = make_train_condition_name(self.config)
            synth_condition = make_synthesis_condition_name(self.config)
            if outdir:
                synth_dir = outdir
            else:
                synth_dir = os.path.join(self.config['workdir'], 'synthesis_%s'%(synth_type), train_condition, synth_condition)
            safe_makedir(synth_dir)
                
            self.report('               ==== SYNTHESISE %s ===='%(base))
            outstem = os.path.join(synth_dir, base)       
        else:
            self.report('               ==== SYNTHESISE %s ===='%(outstem))

        start_time = self.start_clock('Get speech ')
        unnorm_speech = compose_speech(data_dirs, base, self.stream_list_target, \
                                self.config['datadims_target']) 

        if self.config.get('pitch_synchronise_test_data', False):
            unnorm_speech = pitch_synchronise(unnorm_speech, self.stream_list_target, \
                                self.config['datadims_target'])
            #unnorm_speech = unnorm_speech_b

        m,dim = unnorm_speech.shape

        speech = standardise(unnorm_speech, self.mean_vec_target, self.std_vec_target)         
            

        if self.config.get('REPLICATE_IS2018_EXP', False):
            unit_features = speech[1:-1, :]  
        else:
            unit_features = speech

        unit_features = weight(unit_features, self.target_weight_vector)       
        n_units, _ = unit_features.shape
        self.stop_clock(start_time)

        if hasattr(self, 'target_truncation_vector'):
            print 'truncate target streams...'
            print unit_features.shape
            unit_features = unit_features[:, self.target_truncation_vector]
            #print unit_features.shape
            #sys.exit('wewevws000')


        if self.config.get('debug_with_adjacent_frames', False):
            print 'Concatenate naturally contiguous units to debug concatenation!'
            assert not self.config.get('magphase_use_target_f0', True), 'set magphase_use_target_f0 to False for using debug_with_adjacent_frames'
            multiepoch = self.config.get('multiepoch', 1)
            if multiepoch > 1:
                best_path = np.arange(0,500, multiepoch)
            else:
                best_path = np.arange(500)

        else:
            assert self.config['greedy_search']
            assert self.config.get('target_representation') == 'epoch'
            best_path = self.greedy_joint_search(unit_features)


        if self.mode_of_operation == 'stream_weight_balancing':
            self.report( '\n\n balancing stream weights -- skip making waveform \n\n')
        else:
            PRELOAD_UTTS = False  ### !TODO?
            if PRELOAD_UTTS:
                start_time = self.start_clock('Preload magphase utts for sentence')
                self.preload_magphase_utts(best_path)
                self.stop_clock(start_time) 

            start_time = self.start_clock('Extract and join units')
            
            if self.config.get('store_full_magphase_sep_files', False):
                assert self.config['target_representation'] == 'epoch'
                target_fz = unnorm_speech[:,-1]  ## TODO: unhardcode position and lf0!
                target_fz = np.exp(target_fz)
                magphase_overlap = self.config.get('magphase_overlap', 0)


                if self.config.get('magphase_use_target_f0', True):
                    self.concatenateMagPhaseEpoch_sep_files(best_path, outstem + '.wav', fzero=target_fz, overlap=magphase_overlap)                
                else:
                    self.concatenateMagPhaseEpoch_sep_files(best_path, outstem + '.wav', overlap=magphase_overlap)                

            elif self.config.get('store_full_magphase', False):
                target_fz = unnorm_speech[:,-1]
                target_fz = np.exp(target_fz)
                self.concatenateMagPhaseEpoch(best_path, outstem + '.wav', fzero=target_fz)
            else:
                sys.exit('only support store_full_magphase_sep_files / store_full_magphase')
            self.stop_clock(start_time)          
            self.report( 'Output wave: %s.wav\n\n'%(outstem ))

        if self.mode_of_operation == 'stream_weight_balancing':
            tscores = self.get_target_scores_per_stream(unit_features, best_path)
            jscores = self.get_join_scores_per_stream(best_path)
            return (tscores, jscores)

        if self.config['get_selection_info']:
            trace_lines = self.get_path_information_epoch(unit_features, best_path)
            writelist(trace_lines, outstem + '.trace.txt')
            print 'Wrote trace file %s'%(outstem + '.trace.txt')