def record_config(self): ''' tk variable values -> config file ''' safe_makedir(self.current_setting_dir()) f = open(self.current_setting_file(), 'w') for (k, v) in self.get_current_settings().items(): f.write('%s = %s\n' % (k, str(v))) f.close()
def get_data_dump_name(config, joindata=False, joinsql=False, searchtree=False): safe_makedir(os.path.join(config['workdir'], 'data_dumps')) condition = make_train_condition_name(config) assert not (joindata and joinsql) if joindata: last_part = '.joindata.hdf5' elif joinsql: last_part = '.joindata.sql' elif searchtree: last_part = '.searchtree.hdf5' else: last_part = '.hdf5' database_fname = os.path.join(config['workdir'], "data_dumps", condition + last_part) return database_fname
def synthesise_condition(self): if not self.settings_have_changed(): print 'NO VALUES ALTERED' return self.current_config_number = self.next_new_config_number self.next_new_config_number += 1 self.reconfigure_synthesiser() safe_makedir(self.current_setting_dir()) self.update_play_buttons() self.synthesiser.synth_from_config( outdir=self.current_setting_dir() ) # opts.output_dir) # (inspect_join_weights_only=False, synth_type='test', outdir=opts.output_dir) self.record_config()
opts = a.parse_args() synth = Synthesiser(opts.config_fname) trial = 1 html_file = os.path.join(opts.output_dir, 'listen.html') while shall_we_continue(): anything_changed = synth.reconfigure_from_config_file() if anything_changed: current_setting_dir = os.path.join(opts.output_dir, 't'+str(trial).zfill(5)) safe_makedir(current_setting_dir) synth.synth_from_config(outdir=current_setting_dir) record_config(synth, current_setting_dir + '/tuned_settings.cfg') trial += 1 voice_dirs = glob.glob(os.path.join(opts.output_dir, 't*')) make_internal_webchart.main_work(voice_dirs, outfile=html_file) print 'Browse to %s to listen'%(html_file) else: print 'Nothing changed in config file %s'%(opts.config_fname)
def make_magphase_directory_structure(outdir, cepstra=False): outdir_hi = os.path.join(outdir, 'high') outdir_lo = os.path.join(outdir, 'low') for direc in [outdir, outdir_hi, outdir_lo]: safe_makedir(direc) for subdir in ['mag', 'real', 'imag']: for direc in [outdir_hi, outdir_lo]: new_direc = os.path.join(direc, subdir) safe_makedir(new_direc) for subdir in ['shift', 'pm']: new_direc = os.path.join(outdir, subdir) safe_makedir(new_direc) safe_makedir(os.path.join(outdir_hi, 'f0')) safe_makedir(os.path.join(outdir_lo, 'lf0')) if cepstra: for subdir in ['mag_cc', 'imag_cc', 'real_cc']: safe_makedir(os.path.join(outdir_lo, subdir))
def synth_utt(self, base, synth_type='tune', outstem=''): if synth_type == 'test': data_dirs = self.test_data_target_dirs # lab_dir = self.config['test_lab_dir'] elif synth_type == 'tune': data_dirs = self.tune_data_target_dirs # lab_dir = self.config['tune_lab_dir'] else: sys.exit('Unknown synth_type 943957011') if not outstem: train_condition = make_train_condition_name(self.config) synth_condition = self.make_synthesis_condition_name() synth_dir = os.path.join(self.config['workdir'], 'synthesis_%s' % (synth_type), train_condition, synth_condition) safe_makedir(synth_dir) self.report(' ==== SYNTHESISE %s ====' % (base)) outstem = os.path.join(synth_dir, base) else: self.report(' ==== SYNTHESISE %s ====' % (outstem)) start_time = self.start_clock('Get speech ') speech = compose_speech(data_dirs, base, self.stream_list_target, \ self.config['datadims_target']) ### upsample before standardisation (inefficient, but standardisation rewrites uv values?? TODO: check this) nframes, dim = speech.shape len_wave = int(self.rate * self.fshift_seconds * nframes) speech = resample.upsample(len_wave, self.rate, self.fshift_seconds, speech, f0_dim=-1, convention='world') if (self.config['standardise_target_data'], True): speech = standardise(speech, self.mean_vec_target, self.std_vec_target) #fshift_seconds = (0.001 * self.config['frameshift_ms']) #fshift = int(self.config['sample_rate'] * fshift_seconds) unit_features = speech unit_features = weight(unit_features, self.target_weight_vector) #### TEMp!!!!!! #unit_features = unit_features[2000:3000, :] n_units, _ = unit_features.shape self.stop_clock(start_time) ### always do greedy search for sample-based selection best_path, gen_wave = self.greedy_joint_search(unit_features) #print best_path #print gen_wave if NORMWAVE: print 'predenorm stats:' print(gen_wave.mean(), gen_wave.std()) ### denormalise:- gen_wave = (gen_wave * self.wave_std ) + self.wave_mean # gen_wave + self.wave_mean # print 'denorm stats:' print(gen_wave.mean(), gen_wave.std()) if self.config['nonlin_wave']: gen_wave = mu2lin(gen_wave) # print 'linear stats:' # print (gen_wave.mean(), gen_wave.std()) # pylab.plot(gen_wave) # pylab.show() if self.mode_of_operation == 'stream_weight_balancing': self.report('') self.report('balancing stream weights -- skip making waveform') self.report('') else: start_time = self.start_clock('Wrtie wave') write_wave(gen_wave, outstem + '.wav', self.rate) self.stop_clock(start_time) self.report('Output wave: %s.wav' % (outstem)) self.report('') self.report('')
# ======== process command line ========== a = ArgumentParser() a.add_argument('-f', dest='feature_dir', required=True) a.add_argument('-o', dest='output_dir', required=True) a.add_argument('-N', dest='nfiles', type=int, default=0) a.add_argument('-m', type=int, default=60, help='low dim feature size (compressed mel magnitude spectrum & cepstrum)') a.add_argument('-p', type=int, default=45, help='low dim feature size (compressed mel phase spectra & cepstra)') a.add_argument('-fftlen', type=int, default=1024) a.add_argument('-ncores', type=int, default=0) a.add_argument('-fs', type=int, default=48000) a.add_argument('-pattern', type=str, default='', help='only synthesise files with this substring in their basename') opts = a.parse_args() safe_makedir(opts.output_dir) baselist = [basename(fname) for fname in sorted(glob.glob(opts.feature_dir + '/lf0/*.lf0'))] #### temp # baselist2 = [] # for base in baselist: # if int(base.replace('hvd_', '')) > 600: # baselist2.append(base) # baselist = baselist2 if opts.pattern: baselist = [b for b in baselist if opts.pattern in b] if opts.nfiles > 0:
# ## this is the training data as regenerated by LSTM trained on it (for target cost): # streams_dir = '/afs/inf.ed.ac.uk/group/cstr/projects/blizzard_entries/blizzard2017/hybrid_voice/data/predicted_params/train/' # topoutdir = '/tmp/testpad' ## -------- ## HTS style labels used in Blizzard:- hts_quinphone_regex = '([^~]+)~([^-]+)-([^\+]+)\+([^\=]+)\=([^:]+)' stream_list = ['mgc', 'lf0'] stream_dims = {'mgc': 60, 'lf0': 1} for labfname in glob.glob(labdir + '/*.lab'): print labfname lab = read_label(labfname, hts_quinphone_regex) base = basename(labfname) for stream in stream_list: stream_file = os.path.join(streams_dir, stream, base + '.' + stream) if not os.path.isfile(stream_file): print 'skip!' continue speech = get_speech(stream_file, stream_dims[stream]) speech = reinsert_terminal_silence(speech, lab) outdir = topoutdir + '/' + stream safe_makedir(outdir) put_speech(speech, outdir + '/' + base + '.' + stream)
def synth_utt(self, base, synth_type='tune', outstem='', outdir=''): if synth_type == 'test': data_dirs = self.test_data_target_dirs lab_dir = self.config.get('test_lab_dir', '') ## default added for pure acoustic epoch case elif synth_type == 'tune': data_dirs = self.tune_data_target_dirs lab_dir = self.config.get('tune_lab_dir', '') ## default added for pure acoustic epoch case else: sys.exit('Unknown synth_type 9489384') if outdir: assert not outstem if not outstem: train_condition = make_train_condition_name(self.config) synth_condition = make_synthesis_condition_name(self.config) if outdir: synth_dir = outdir else: synth_dir = os.path.join(self.config['workdir'], 'synthesis_%s'%(synth_type), train_condition, synth_condition) safe_makedir(synth_dir) self.report(' ==== SYNTHESISE %s ===='%(base)) outstem = os.path.join(synth_dir, base) else: self.report(' ==== SYNTHESISE %s ===='%(outstem)) start_time = self.start_clock('Get speech ') unnorm_speech = compose_speech(data_dirs, base, self.stream_list_target, \ self.config['datadims_target']) if self.config.get('pitch_synchronise_test_data', False): unnorm_speech = pitch_synchronise(unnorm_speech, self.stream_list_target, \ self.config['datadims_target']) #unnorm_speech = unnorm_speech_b m,dim = unnorm_speech.shape speech = standardise(unnorm_speech, self.mean_vec_target, self.std_vec_target) if self.config.get('REPLICATE_IS2018_EXP', False): unit_features = speech[1:-1, :] else: unit_features = speech unit_features = weight(unit_features, self.target_weight_vector) n_units, _ = unit_features.shape self.stop_clock(start_time) if hasattr(self, 'target_truncation_vector'): print 'truncate target streams...' print unit_features.shape unit_features = unit_features[:, self.target_truncation_vector] #print unit_features.shape #sys.exit('wewevws000') if self.config.get('debug_with_adjacent_frames', False): print 'Concatenate naturally contiguous units to debug concatenation!' assert not self.config.get('magphase_use_target_f0', True), 'set magphase_use_target_f0 to False for using debug_with_adjacent_frames' multiepoch = self.config.get('multiepoch', 1) if multiepoch > 1: best_path = np.arange(0,500, multiepoch) else: best_path = np.arange(500) else: assert self.config['greedy_search'] assert self.config.get('target_representation') == 'epoch' best_path = self.greedy_joint_search(unit_features) if self.mode_of_operation == 'stream_weight_balancing': self.report( '\n\n balancing stream weights -- skip making waveform \n\n') else: PRELOAD_UTTS = False ### !TODO? if PRELOAD_UTTS: start_time = self.start_clock('Preload magphase utts for sentence') self.preload_magphase_utts(best_path) self.stop_clock(start_time) start_time = self.start_clock('Extract and join units') if self.config.get('store_full_magphase_sep_files', False): assert self.config['target_representation'] == 'epoch' target_fz = unnorm_speech[:,-1] ## TODO: unhardcode position and lf0! target_fz = np.exp(target_fz) magphase_overlap = self.config.get('magphase_overlap', 0) if self.config.get('magphase_use_target_f0', True): self.concatenateMagPhaseEpoch_sep_files(best_path, outstem + '.wav', fzero=target_fz, overlap=magphase_overlap) else: self.concatenateMagPhaseEpoch_sep_files(best_path, outstem + '.wav', overlap=magphase_overlap) elif self.config.get('store_full_magphase', False): target_fz = unnorm_speech[:,-1] target_fz = np.exp(target_fz) self.concatenateMagPhaseEpoch(best_path, outstem + '.wav', fzero=target_fz) else: sys.exit('only support store_full_magphase_sep_files / store_full_magphase') self.stop_clock(start_time) self.report( 'Output wave: %s.wav\n\n'%(outstem )) if self.mode_of_operation == 'stream_weight_balancing': tscores = self.get_target_scores_per_stream(unit_features, best_path) jscores = self.get_join_scores_per_stream(best_path) return (tscores, jscores) if self.config['get_selection_info']: trace_lines = self.get_path_information_epoch(unit_features, best_path) writelist(trace_lines, outstem + '.trace.txt') print 'Wrote trace file %s'%(outstem + '.trace.txt')