def set_weights_per_feature(self, weights): self.target_weight_vector = np.array( weights[self.config['wave_context_length']:]) extended_weight_vector = np.array(weights) self.train_unit_features = weight(self.train_unit_features_unweighted, extended_weight_vector) if self.holdout_percent > 0.0: self.train_unit_features_dev = weight( self.train_unit_features_unweighted_dev, extended_weight_vector)
def set_target_weights(self, weights): assert len(weights) == len(self.stream_list_target), (weights, self.stream_list_target) ## get from per-stream to per-coeff weights: target_weight_vector = [] for (i,stream) in enumerate(self.stream_list_target): # if stream in const.vuv_stream_names: # target_weight_vector.extend([weights[i]]*2) # else: target_weight_vector.extend([weights[i]]*self.datadims_target[stream]) nrepetitions = const.target_rep_widths[self.target_representation] target_weight_vector = np.array(target_weight_vector * nrepetitions) self.train_unit_features = weight(self.train_unit_features_unweighted, target_weight_vector) if self.holdout_samples > 0: self.train_unit_features_dev = weight(self.train_unit_features_unweighted_dev, target_weight_vector) ## save this so we can weight incoming predicted acoustics: self.target_weight_vector = target_weight_vector
def set_weights(self, weights): assert len(weights) == len( self.stream_list_target), (weights, self.stream_list_target) ## get from per-stream to per-coeff weights: target_weight_vector = [] for (i, stream) in enumerate(self.stream_list_target): target_weight_vector.extend([weights[i]] * self.datadims_target[stream]) nrepetitions = const.target_rep_widths[self.target_representation] target_weight_vector = np.array(target_weight_vector * nrepetitions) ## save this so we can weight incoming predicted acoustics: self.target_weight_vector = target_weight_vector ### extend with arbitrary weights (1s) for waveform history:- extended_weight_vector = np.concatenate([ np.ones(self.config['wave_context_length']), target_weight_vector ]) self.train_unit_features = weight(self.train_unit_features_unweighted, extended_weight_vector) if self.holdout_percent > 0.0: self.train_unit_features_dev = weight( self.train_unit_features_unweighted_dev, extended_weight_vector)
def set_join_weights(self, weights): assert len(weights) == len(self.stream_list_join) ## get from per-stream to per-coeff weights: join_weight_vector = [] for (i,stream) in enumerate(self.stream_list_join): # if stream in const.vuv_stream_names: # join_weight_vector.extend([weights[i]]*2) # else: join_weight_vector.extend([weights[i]]*self.datadims_join[stream]) join_weight_vector = np.array(join_weight_vector) ## TODO: be more explicit about how this copies and does NOT weight original self.join_contexts_unweighted join_contexts_weighted = weight(self.join_contexts_unweighted, join_weight_vector) ## This should not copy: self.unit_end_data = join_contexts_weighted[1:,:] self.unit_start_data = join_contexts_weighted[:-1,:] ## <-- only this one is used! if self.holdout_samples > 0: self.unit_end_data = self.unit_end_data[:-self.holdout_samples,:] self.unit_start_data = self.unit_start_data[:-self.holdout_samples,:]
def synth_utt(self, base, synth_type='tune', outstem=''): if synth_type == 'test': data_dirs = self.test_data_target_dirs # lab_dir = self.config['test_lab_dir'] elif synth_type == 'tune': data_dirs = self.tune_data_target_dirs # lab_dir = self.config['tune_lab_dir'] else: sys.exit('Unknown synth_type 943957011') if not outstem: train_condition = make_train_condition_name(self.config) synth_condition = self.make_synthesis_condition_name() synth_dir = os.path.join(self.config['workdir'], 'synthesis_%s' % (synth_type), train_condition, synth_condition) safe_makedir(synth_dir) self.report(' ==== SYNTHESISE %s ====' % (base)) outstem = os.path.join(synth_dir, base) else: self.report(' ==== SYNTHESISE %s ====' % (outstem)) start_time = self.start_clock('Get speech ') speech = compose_speech(data_dirs, base, self.stream_list_target, \ self.config['datadims_target']) ### upsample before standardisation (inefficient, but standardisation rewrites uv values?? TODO: check this) nframes, dim = speech.shape len_wave = int(self.rate * self.fshift_seconds * nframes) speech = resample.upsample(len_wave, self.rate, self.fshift_seconds, speech, f0_dim=-1, convention='world') if (self.config['standardise_target_data'], True): speech = standardise(speech, self.mean_vec_target, self.std_vec_target) #fshift_seconds = (0.001 * self.config['frameshift_ms']) #fshift = int(self.config['sample_rate'] * fshift_seconds) unit_features = speech unit_features = weight(unit_features, self.target_weight_vector) #### TEMp!!!!!! #unit_features = unit_features[2000:3000, :] n_units, _ = unit_features.shape self.stop_clock(start_time) ### always do greedy search for sample-based selection best_path, gen_wave = self.greedy_joint_search(unit_features) #print best_path #print gen_wave if NORMWAVE: print 'predenorm stats:' print(gen_wave.mean(), gen_wave.std()) ### denormalise:- gen_wave = (gen_wave * self.wave_std ) + self.wave_mean # gen_wave + self.wave_mean # print 'denorm stats:' print(gen_wave.mean(), gen_wave.std()) if self.config['nonlin_wave']: gen_wave = mu2lin(gen_wave) # print 'linear stats:' # print (gen_wave.mean(), gen_wave.std()) # pylab.plot(gen_wave) # pylab.show() if self.mode_of_operation == 'stream_weight_balancing': self.report('') self.report('balancing stream weights -- skip making waveform') self.report('') else: start_time = self.start_clock('Wrtie wave') write_wave(gen_wave, outstem + '.wav', self.rate) self.stop_clock(start_time) self.report('Output wave: %s.wav' % (outstem)) self.report('') self.report('')
def synth_utt(self, base, synth_type='tune', outstem='', outdir=''): if synth_type == 'test': data_dirs = self.test_data_target_dirs lab_dir = self.config.get('test_lab_dir', '') ## default added for pure acoustic epoch case elif synth_type == 'tune': data_dirs = self.tune_data_target_dirs lab_dir = self.config.get('tune_lab_dir', '') ## default added for pure acoustic epoch case else: sys.exit('Unknown synth_type 9489384') if outdir: assert not outstem if not outstem: train_condition = make_train_condition_name(self.config) synth_condition = make_synthesis_condition_name(self.config) if outdir: synth_dir = outdir else: synth_dir = os.path.join(self.config['workdir'], 'synthesis_%s'%(synth_type), train_condition, synth_condition) safe_makedir(synth_dir) self.report(' ==== SYNTHESISE %s ===='%(base)) outstem = os.path.join(synth_dir, base) else: self.report(' ==== SYNTHESISE %s ===='%(outstem)) start_time = self.start_clock('Get speech ') unnorm_speech = compose_speech(data_dirs, base, self.stream_list_target, \ self.config['datadims_target']) if self.config.get('pitch_synchronise_test_data', False): unnorm_speech = pitch_synchronise(unnorm_speech, self.stream_list_target, \ self.config['datadims_target']) #unnorm_speech = unnorm_speech_b m,dim = unnorm_speech.shape speech = standardise(unnorm_speech, self.mean_vec_target, self.std_vec_target) if self.config.get('REPLICATE_IS2018_EXP', False): unit_features = speech[1:-1, :] else: unit_features = speech unit_features = weight(unit_features, self.target_weight_vector) n_units, _ = unit_features.shape self.stop_clock(start_time) if hasattr(self, 'target_truncation_vector'): print 'truncate target streams...' print unit_features.shape unit_features = unit_features[:, self.target_truncation_vector] #print unit_features.shape #sys.exit('wewevws000') if self.config.get('debug_with_adjacent_frames', False): print 'Concatenate naturally contiguous units to debug concatenation!' assert not self.config.get('magphase_use_target_f0', True), 'set magphase_use_target_f0 to False for using debug_with_adjacent_frames' multiepoch = self.config.get('multiepoch', 1) if multiepoch > 1: best_path = np.arange(0,500, multiepoch) else: best_path = np.arange(500) else: assert self.config['greedy_search'] assert self.config.get('target_representation') == 'epoch' best_path = self.greedy_joint_search(unit_features) if self.mode_of_operation == 'stream_weight_balancing': self.report( '\n\n balancing stream weights -- skip making waveform \n\n') else: PRELOAD_UTTS = False ### !TODO? if PRELOAD_UTTS: start_time = self.start_clock('Preload magphase utts for sentence') self.preload_magphase_utts(best_path) self.stop_clock(start_time) start_time = self.start_clock('Extract and join units') if self.config.get('store_full_magphase_sep_files', False): assert self.config['target_representation'] == 'epoch' target_fz = unnorm_speech[:,-1] ## TODO: unhardcode position and lf0! target_fz = np.exp(target_fz) magphase_overlap = self.config.get('magphase_overlap', 0) if self.config.get('magphase_use_target_f0', True): self.concatenateMagPhaseEpoch_sep_files(best_path, outstem + '.wav', fzero=target_fz, overlap=magphase_overlap) else: self.concatenateMagPhaseEpoch_sep_files(best_path, outstem + '.wav', overlap=magphase_overlap) elif self.config.get('store_full_magphase', False): target_fz = unnorm_speech[:,-1] target_fz = np.exp(target_fz) self.concatenateMagPhaseEpoch(best_path, outstem + '.wav', fzero=target_fz) else: sys.exit('only support store_full_magphase_sep_files / store_full_magphase') self.stop_clock(start_time) self.report( 'Output wave: %s.wav\n\n'%(outstem )) if self.mode_of_operation == 'stream_weight_balancing': tscores = self.get_target_scores_per_stream(unit_features, best_path) jscores = self.get_join_scores_per_stream(best_path) return (tscores, jscores) if self.config['get_selection_info']: trace_lines = self.get_path_information_epoch(unit_features, best_path) writelist(trace_lines, outstem + '.trace.txt') print 'Wrote trace file %s'%(outstem + '.trace.txt')