Esempio n. 1
0
    def test_file_hdf5(self, file_name):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """


        mel, stft = self.read_hdf5_file(file_name)

        out_mel, out_f0, out_vuv = self.process_file(stft,  self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}



        synth = utils.query_yes_no("Synthesize output? ")

        if synth:

            out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1)

            audio_out = sig_process.feats_to_audio(out_featss) 

            sf.write(os.path.join(config.output_dir,'{}_{}_SIN.wav'.format(file_name[:-4], config.singers[speaker_index_2])), audio_out, config.fs)

        synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel) 
            sf.write(os.path.join(config.output_dir,'{}_ori.wav'.format(file_name[:-4])), audio, config.fs)
    def test_file_hdf5(self, file_name, speaker_index_2):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, notes = self.read_hdf5_file(file_name)

        speaker_name = file_name.split('_')[1]
        speaker_index = config.singers.index(speaker_name)
        speaker_gender = config.genders[speaker_name]
        print("Original singer is {}, a human {}".format(
            speaker_name, speaker_gender))

        speaker_2_gender = config.genders[config.singers[speaker_index_2]]
        print("Target singer is {}, a human {}".format(
            config.singers[speaker_index_2], speaker_2_gender))

        if speaker_gender == "F" and speaker_2_gender == "M":
            notes[:, 0] = notes[:, 0] - 12
        elif speaker_gender == "M" and speaker_2_gender == "F":
            notes[:, 0] = notes[:, 0] + 12

        out_mel, out_f0, out_vuv = self.process_file(mel, speaker_index,
                                                     speaker_index_2, notes,
                                                     self.sess)



        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": mel[:,-2], "op": out_f0, "notes": notes[:,0]}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        if synth:

            out_featss = np.concatenate((out_mel, out_f0, out_vuv), axis=-1)

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir, './{}_{}_autovc_notes.wav'.format(
                        file_name[:-5], config.singers[speaker_index_2])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(
                    config.output_dir,
                    './{}_{}_ori.wav'.format(file_name[:-5],
                                             config.singers[speaker_index])),
                audio, config.fs)
Esempio n. 3
0
    def test_file_hdf5(self, file_name, speaker_index_2):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, singer_1 = self.read_hdf5_file(file_name)

        speaker_name = file_name.split('_')[1]
        speaker_index = config.singers.index(speaker_name)
        # speaker_file = [x for x in os.listdir(config.emb_dir) if x.endswith('npy') and x.split('_')[1] == speaker_name]

        speaker_gender = "M"
        print("Original singer is {}, a human {}".format(speaker_name, speaker_gender))

        speaker_file_2 = [x for x in os.listdir(config.feats_dir) if x.endswith('hdf5') and x.split('_')[1] == config.singers[speaker_index_2]]
        mel_2, singer_2 = self.read_hdf5_file(random.choice(speaker_file_2))

        speaker_2_gender = "M"
        print("Target singer is {}, a human {}".format(config.singers[speaker_index_2], speaker_2_gender))



        out_mel = self.process_file(mel, singer_1, singer_2, self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]}}


        self.plot_features(plot_dict)



        synth = utils.query_yes_no("Synthesize output? ")

        if synth:
            if speaker_gender == "F" and speaker_2_gender == "M":
                out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]-12, mel[:out_mel.shape[0],-1:]), axis = -1)
            elif speaker_gender == "M" and speaker_2_gender == "F":
                out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]+12, mel[:out_mel.shape[0],-1:]), axis = -1)
            else:
                out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1)

            audio_out = sig_process.feats_to_audio(out_featss) 

            audio_out_2 = sig_process.feats_to_audio(mel_2) 

            sf.write(os.path.join(config.output_dir,'./{}_{}_autovcemb.wav'.format(file_name[:-5], config.singers[speaker_index_2])), audio_out, config.fs)

            sf.write(os.path.join(config.output_dir,'./{}_target.wav'.format(file_name[:-5])), audio_out_2, config.fs)

        synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel) 
            sf.write(os.path.join(config.output_dir,'./{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index])), audio, config.fs)
Esempio n. 4
0
    def test_file_wav(self, file_name, speaker_index):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, stft = self.read_wav_file(file_name)

        out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index,
                                                     self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        file_name = file_name.split('/')[-1]

        if synth:
            gen_change = utils.query_yes_no("Change in gender? ")
            if gen_change:
                female_male = utils.query_yes_no("Female to male?")
                if female_male:
                    out_featss = np.concatenate(
                        (out_mel, out_f0 - 12, out_vuv), axis=-1)
                else:
                    out_featss = np.concatenate(
                        (out_mel, out_f0 + 12, out_vuv), axis=-1)
            else:
                out_featss = np.concatenate((out_mel, out_f0, out_vuv),
                                            axis=-1)

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir,
                    '{}_{}_SDN.wav'.format(file_name[:-4],
                                           config.singers[speaker_index])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(config.output_dir,
                             '{}_ori.wav'.format(file_name[:-4])), audio,
                config.fs)
Esempio n. 5
0
    def test_file_wav_f0(self, file_name, f0_file, speaker_index):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, stft = self.read_wav_file(file_name)

        f0 = midi_process.open_f0_file(f0_file)

        timestamps = np.arange(0, len(mel) * config.hoptime, config.hoptime)

        f1 = vamp_notes.note2traj(f0, timestamps)

        f1 = sig_process.process_pitch(f1[:, 0])

        out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index,
                                                     self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": f1[:,0], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        file_name = file_name.split('/')[-1]

        if synth:

            out_featss = np.concatenate((out_mel[:f1.shape[0]], f1), axis=-1)

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir,
                    '{}_{}_SDN_f0_{}.wav'.format(file_name[:-4],
                                                 config.singers[speaker_index],
                                                 f0_file.split('/')[-1])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(config.output_dir,
                             '{}_ori.wav'.format(file_name[:-4])), audio,
                config.fs)
Esempio n. 6
0
    def test_file_wav(self, file_name, speaker_index_1, speaker_index):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        speaker_gender = config.genders[config.singers[speaker_index_1]]
        print("Original singer is {}, a human {}".format(
            config.singers[speaker_index_1], speaker_gender))

        speaker_2_gender = config.genders[config.singers[speaker_index]]
        print("Target singer is {}, a human {}".format(
            config.singers[speaker_index], speaker_2_gender))
        stft, mel = self.read_wav_file(file_name)

        out_mel = self.process_file(mel, speaker_index_1, speaker_index,
                                    self.sess)

        plot_dict = {
            "Spec Envelope": {
                "gt": mel[:, :-6],
                "op": out_mel[:, :-4]
            },
            "Aperiodic": {
                "gt": mel[:, -6:-2],
                "op": out_mel[:, -4:]
            }
        }

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        if synth:
            if speaker_gender == "F" and speaker_2_gender == "M":
                out_featss = np.concatenate(
                    (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] -
                     12, mel[:out_mel.shape[0], -1:]),
                    axis=-1)
            elif speaker_gender == "M" and speaker_2_gender == "F":
                out_featss = np.concatenate(
                    (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] +
                     12, mel[:out_mel.shape[0], -1:]),
                    axis=-1)
            else:
                out_featss = np.concatenate(
                    (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1],
                     mel[:out_mel.shape[0], -1:]),
                    axis=-1)
                # 0 = mel[:out_mel.shape[0],-2:-1] + np.random.rand(mel[:out_mel.shape[0],-2:-1].shape[0])* 0.5

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir, './{}_{}_autovc.wav'.format(
                        file_name[:-5], config.singers[speaker_index])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(
                    config.output_dir,
                    './{}_{}_ori.wav'.format(file_name[:-5],
                                             config.singers[speaker_index_1])),
                audio, config.fs)