def test_file_hdf5(self, file_name): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, stft = self.read_hdf5_file(file_name) out_mel, out_f0, out_vuv = self.process_file(stft, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} synth = utils.query_yes_no("Synthesize output? ") if synth: out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) sf.write(os.path.join(config.output_dir,'{}_{}_SIN.wav'.format(file_name[:-4], config.singers[speaker_index_2])), audio_out, config.fs) synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write(os.path.join(config.output_dir,'{}_ori.wav'.format(file_name[:-4])), audio, config.fs)
def test_file_hdf5(self, file_name, speaker_index_2): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, notes = self.read_hdf5_file(file_name) speaker_name = file_name.split('_')[1] speaker_index = config.singers.index(speaker_name) speaker_gender = config.genders[speaker_name] print("Original singer is {}, a human {}".format( speaker_name, speaker_gender)) speaker_2_gender = config.genders[config.singers[speaker_index_2]] print("Target singer is {}, a human {}".format( config.singers[speaker_index_2], speaker_2_gender)) if speaker_gender == "F" and speaker_2_gender == "M": notes[:, 0] = notes[:, 0] - 12 elif speaker_gender == "M" and speaker_2_gender == "F": notes[:, 0] = notes[:, 0] + 12 out_mel, out_f0, out_vuv = self.process_file(mel, speaker_index, speaker_index_2, notes, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": mel[:,-2], "op": out_f0, "notes": notes[:,0]}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") if synth: out_featss = np.concatenate((out_mel, out_f0, out_vuv), axis=-1) audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, './{}_{}_autovc_notes.wav'.format( file_name[:-5], config.singers[speaker_index_2])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join( config.output_dir, './{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index])), audio, config.fs)
def test_file_hdf5(self, file_name, speaker_index_2): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, singer_1 = self.read_hdf5_file(file_name) speaker_name = file_name.split('_')[1] speaker_index = config.singers.index(speaker_name) # speaker_file = [x for x in os.listdir(config.emb_dir) if x.endswith('npy') and x.split('_')[1] == speaker_name] speaker_gender = "M" print("Original singer is {}, a human {}".format(speaker_name, speaker_gender)) speaker_file_2 = [x for x in os.listdir(config.feats_dir) if x.endswith('hdf5') and x.split('_')[1] == config.singers[speaker_index_2]] mel_2, singer_2 = self.read_hdf5_file(random.choice(speaker_file_2)) speaker_2_gender = "M" print("Target singer is {}, a human {}".format(config.singers[speaker_index_2], speaker_2_gender)) out_mel = self.process_file(mel, singer_1, singer_2, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") if synth: if speaker_gender == "F" and speaker_2_gender == "M": out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]-12, mel[:out_mel.shape[0],-1:]), axis = -1) elif speaker_gender == "M" and speaker_2_gender == "F": out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]+12, mel[:out_mel.shape[0],-1:]), axis = -1) else: out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1) audio_out = sig_process.feats_to_audio(out_featss) audio_out_2 = sig_process.feats_to_audio(mel_2) sf.write(os.path.join(config.output_dir,'./{}_{}_autovcemb.wav'.format(file_name[:-5], config.singers[speaker_index_2])), audio_out, config.fs) sf.write(os.path.join(config.output_dir,'./{}_target.wav'.format(file_name[:-5])), audio_out_2, config.fs) synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write(os.path.join(config.output_dir,'./{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index])), audio, config.fs)
def test_file_wav(self, file_name, speaker_index): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, stft = self.read_wav_file(file_name) out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") file_name = file_name.split('/')[-1] if synth: gen_change = utils.query_yes_no("Change in gender? ") if gen_change: female_male = utils.query_yes_no("Female to male?") if female_male: out_featss = np.concatenate( (out_mel, out_f0 - 12, out_vuv), axis=-1) else: out_featss = np.concatenate( (out_mel, out_f0 + 12, out_vuv), axis=-1) else: out_featss = np.concatenate((out_mel, out_f0, out_vuv), axis=-1) audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, '{}_{}_SDN.wav'.format(file_name[:-4], config.singers[speaker_index])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join(config.output_dir, '{}_ori.wav'.format(file_name[:-4])), audio, config.fs)
def test_file_wav_f0(self, file_name, f0_file, speaker_index): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ mel, stft = self.read_wav_file(file_name) f0 = midi_process.open_f0_file(f0_file) timestamps = np.arange(0, len(mel) * config.hoptime, config.hoptime) f1 = vamp_notes.note2traj(f0, timestamps) f1 = sig_process.process_pitch(f1[:, 0]) out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index, self.sess) plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\ "F0": {"gt": f1[:,0], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}} self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") file_name = file_name.split('/')[-1] if synth: out_featss = np.concatenate((out_mel[:f1.shape[0]], f1), axis=-1) audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, '{}_{}_SDN_f0_{}.wav'.format(file_name[:-4], config.singers[speaker_index], f0_file.split('/')[-1])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join(config.output_dir, '{}_ori.wav'.format(file_name[:-4])), audio, config.fs)
def test_file_wav(self, file_name, speaker_index_1, speaker_index): """ Function to extract multi pitch from file. Currently supports only HDF5 files. """ speaker_gender = config.genders[config.singers[speaker_index_1]] print("Original singer is {}, a human {}".format( config.singers[speaker_index_1], speaker_gender)) speaker_2_gender = config.genders[config.singers[speaker_index]] print("Target singer is {}, a human {}".format( config.singers[speaker_index], speaker_2_gender)) stft, mel = self.read_wav_file(file_name) out_mel = self.process_file(mel, speaker_index_1, speaker_index, self.sess) plot_dict = { "Spec Envelope": { "gt": mel[:, :-6], "op": out_mel[:, :-4] }, "Aperiodic": { "gt": mel[:, -6:-2], "op": out_mel[:, -4:] } } self.plot_features(plot_dict) synth = utils.query_yes_no("Synthesize output? ") if synth: if speaker_gender == "F" and speaker_2_gender == "M": out_featss = np.concatenate( (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] - 12, mel[:out_mel.shape[0], -1:]), axis=-1) elif speaker_gender == "M" and speaker_2_gender == "F": out_featss = np.concatenate( (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] + 12, mel[:out_mel.shape[0], -1:]), axis=-1) else: out_featss = np.concatenate( (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1], mel[:out_mel.shape[0], -1:]), axis=-1) # 0 = mel[:out_mel.shape[0],-2:-1] + np.random.rand(mel[:out_mel.shape[0],-2:-1].shape[0])* 0.5 audio_out = sig_process.feats_to_audio(out_featss) sf.write( os.path.join( config.output_dir, './{}_{}_autovc.wav'.format( file_name[:-5], config.singers[speaker_index])), audio_out, config.fs) synth_ori = utils.query_yes_no( "Synthesize ground truth with vocoder? ") if synth_ori: audio = sig_process.feats_to_audio(mel) sf.write( os.path.join( config.output_dir, './{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index_1])), audio, config.fs)