Python feats_to_audioの例、synth.utils.sig_process.feats_to_audio Pythonの例

コード例 #1

0

ファイルを表示

    def test_file_wav(self, file_name, speaker_index):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        stft, mel = self.read_wav_file(file_name)

        singer_1 = utils.get_embedding_GE2E(file_name)

        speaker_file_2 = [x for x in os.listdir(config.feats_dir) if x.endswith('hdf5') and x.split('_')[1] == config.singers[speaker_index]]
        mel_2, singer_2 = self.read_hdf5_file(random.choice(speaker_file_2))

        out_mel = self.process_file(mel, singer_1, singer_2, self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]}}

        self.plot_features(plot_dict)

        out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1)

        audio_out = sig_process.feats_to_audio(out_featss) 

        audio_out_2 = sig_process.feats_to_audio(mel_2) 

        file_name = file_name.split('/')[-1].split('.')[0]

        sf.write(os.path.join(config.output_dir,'./{}_{}_autovcemb.wav'.format(file_name, config.singers[speaker_index])), audio_out, config.fs)

        sf.write(os.path.join(config.output_dir,'./{}_{}_target.wav'.format(file_name,config.singers[speaker_index])), audio_out_2, config.fs)
        audio = sig_process.feats_to_audio(mel) 
        sf.write(os.path.join(config.output_dir,'./{}_ori.wav'.format(file_name)), audio, config.fs)

コード例 #2

0

ファイルを表示

    def test_file_hdf5(self, file_name):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """


        mel, stft = self.read_hdf5_file(file_name)

        out_mel, out_f0, out_vuv = self.process_file(stft,  self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}



        synth = utils.query_yes_no("Synthesize output? ")

        if synth:

            out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1)

            audio_out = sig_process.feats_to_audio(out_featss) 

            sf.write(os.path.join(config.output_dir,'{}_{}_SIN.wav'.format(file_name[:-4], config.singers[speaker_index_2])), audio_out, config.fs)

        synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel) 
            sf.write(os.path.join(config.output_dir,'{}_ori.wav'.format(file_name[:-4])), audio, config.fs)

コード例 #3

0

ファイルを表示

ファイル: autovc_notes_emb.py プロジェクト: MTG/content_choral_separation

    def test_file_hdf5(self, file_name, speaker_index_2):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, notes = self.read_hdf5_file(file_name)

        speaker_name = file_name.split('_')[1]
        speaker_index = config.singers.index(speaker_name)
        speaker_gender = config.genders[speaker_name]
        print("Original singer is {}, a human {}".format(
            speaker_name, speaker_gender))

        speaker_2_gender = config.genders[config.singers[speaker_index_2]]
        print("Target singer is {}, a human {}".format(
            config.singers[speaker_index_2], speaker_2_gender))

        if speaker_gender == "F" and speaker_2_gender == "M":
            notes[:, 0] = notes[:, 0] - 12
        elif speaker_gender == "M" and speaker_2_gender == "F":
            notes[:, 0] = notes[:, 0] + 12

        out_mel, out_f0, out_vuv = self.process_file(mel, speaker_index,
                                                     speaker_index_2, notes,
                                                     self.sess)



        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": mel[:,-2], "op": out_f0, "notes": notes[:,0]}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        if synth:

            out_featss = np.concatenate((out_mel, out_f0, out_vuv), axis=-1)

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir, './{}_{}_autovc_notes.wav'.format(
                        file_name[:-5], config.singers[speaker_index_2])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(
                    config.output_dir,
                    './{}_{}_ori.wav'.format(file_name[:-5],
                                             config.singers[speaker_index])),
                audio, config.fs)

コード例 #4

0

ファイルを表示

    def test_file_wav_f0(self, file_name, f0_file):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """


        mel, stft = self.read_wav_file(file_name)

        # import pdb;pdb.set_trace()

        timestamps = np.arange(0, len(mel)*config.hoptime, config.hoptime)


        f0 = midi_process.open_f0_file(f0_file)

        f1 = vamp_notes.note2traj(f0, timestamps)

        f1 = sig_process.process_pitch(f1[:,0])

        out_mel, out_f0, out_vuv = self.process_file(stft,  self.sess)

        # plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
        #  "F0": {"gt": f1[:,0], "op": out_f0}, "Vuv": {"gt": f1[:,1], "op": out_vuv}}


        # self.plot_features(plot_dict)

        file_name = file_name.split('/')[-1]

        # synth_sac = utils.query_yes_no("Synthesize with SAC f0? ")

        # if synth_sac:

        out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:]), axis = -1)

        audio_out = sig_process.feats_to_audio(out_featss) 

        sf.write(os.path.join(config.output_dir,'{}_SIN_YAM_SACf0.wav'.format(file_name[:-4])), audio_out, config.fs)

        # synth = utils.query_yes_no("Synthesize output? ")

        
        f2 = f1[:,0:1] + np.random.rand(f1[:,0:1].shape[0])[:,np.newaxis]
        # if synth:

        out_featss = np.concatenate((out_mel[:f1.shape[0]], f1[:,0:1], out_vuv[:f1.shape[0]]), axis = -1)

        audio_out = sig_process.feats_to_audio(out_featss) 

        sf.write(os.path.join(config.output_dir,'{}_SIN_YAM_f0_{}.wav'.format(file_name[:-4], f0_file.split('/')[-1])), audio_out, config.fs)


        out_featss = np.concatenate((out_mel[:f1.shape[0]], f2, out_vuv[:f1.shape[0]]), axis = -1)

        audio_out = sig_process.feats_to_audio(out_featss) 

        sf.write(os.path.join(config.output_dir,'{}_SIN_YAM_f0_{}_noise.wav'.format(file_name[:-4], f0_file.split('/')[-1])), audio_out, config.fs)

コード例 #5

0

ファイルを表示

    def test_file_hdf5(self, file_name, speaker_index_2):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, singer_1 = self.read_hdf5_file(file_name)

        speaker_name = file_name.split('_')[1]
        speaker_index = config.singers.index(speaker_name)
        # speaker_file = [x for x in os.listdir(config.emb_dir) if x.endswith('npy') and x.split('_')[1] == speaker_name]

        speaker_gender = "M"
        print("Original singer is {}, a human {}".format(speaker_name, speaker_gender))

        speaker_file_2 = [x for x in os.listdir(config.feats_dir) if x.endswith('hdf5') and x.split('_')[1] == config.singers[speaker_index_2]]
        mel_2, singer_2 = self.read_hdf5_file(random.choice(speaker_file_2))

        speaker_2_gender = "M"
        print("Target singer is {}, a human {}".format(config.singers[speaker_index_2], speaker_2_gender))



        out_mel = self.process_file(mel, singer_1, singer_2, self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]}}


        self.plot_features(plot_dict)



        synth = utils.query_yes_no("Synthesize output? ")

        if synth:
            if speaker_gender == "F" and speaker_2_gender == "M":
                out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]-12, mel[:out_mel.shape[0],-1:]), axis = -1)
            elif speaker_gender == "M" and speaker_2_gender == "F":
                out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1]+12, mel[:out_mel.shape[0],-1:]), axis = -1)
            else:
                out_featss = np.concatenate((out_mel[:mel.shape[0]], mel[:out_mel.shape[0],-2:-1], mel[:out_mel.shape[0],-1:]), axis = -1)

            audio_out = sig_process.feats_to_audio(out_featss) 

            audio_out_2 = sig_process.feats_to_audio(mel_2) 

            sf.write(os.path.join(config.output_dir,'./{}_{}_autovcemb.wav'.format(file_name[:-5], config.singers[speaker_index_2])), audio_out, config.fs)

            sf.write(os.path.join(config.output_dir,'./{}_target.wav'.format(file_name[:-5])), audio_out_2, config.fs)

        synth_ori = utils.query_yes_no("Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel) 
            sf.write(os.path.join(config.output_dir,'./{}_{}_ori.wav'.format(file_name[:-5], config.singers[speaker_index])), audio, config.fs)

コード例 #6

0

ファイルを表示

    def solo_unison_file_wav(self, file_name, std=0.5, num_singers=4, timing: int=5):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """
        part = file_name.split('/')[-1].split('_')[1]

        # import pdb;pdb.set_trace()
        
        stft, mel = self.read_wav_file(file_name)

        singer_1 = utils.get_embedding_GE2E(file_name)

        # import pdb;pdb.set_trace()

        # speaker_indecis = [config.singers.index(x) for x in config.singers if x.startswith(part[:-1]) and x != part]

        audio = sig_process.feats_to_audio(mel) 
        sf.write(os.path.join(config.output_dir,'./{}_ori.wav'.format(file_name.split('/')[-1])), audio, config.fs)
        stft, mel = self.read_wav_file(file_name)
        # vuv = mel[:,-1]
        # diffs = np.diff(vuv)
        output = audio
        output_nc = audio
        for count in range(num_singers):
            if part[:-1] in ["soprano", "alto"]:
                singer_2 = self.read_hdf5_file_emb("female_{}.hdf5".format(count+1))
            elif part[:-1] in ["tenor", "bass"]: 
                singer_2 = self.read_hdf5_file_emb("male_{}.hdf5".format(count+1))

    
            out_mel = self.process_file(mel, singer_1, singer_2, self.sess)
            f0 = mel[:out_mel.shape[0],-2:-1] + np.random.rand(mel[:out_mel.shape[0],-2:-1].shape[0])[:,np.newaxis]* std
            out_featss = np.concatenate((out_mel[:mel.shape[0]], f0, mel[:out_mel.shape[0],-1:]), axis = -1)
            out_featss_nochange = np.concatenate((mel[:mel.shape[0]], f0, mel[:out_mel.shape[0],-1:]), axis = -1)
            if timing>0:
                out_featss = np.roll(out_featss, np.random.randint(-timing,timing),0)
                out_featss_nochange = np.roll(out_featss_nochange, np.random.randint(-timing,timing),0)
            audio_out = sig_process.feats_to_audio(out_featss) 
            audio_out_nochange = sig_process.feats_to_audio(out_featss_nochange) 

            output = output[:len(audio_out)]
            output+=audio_out
            output_nc = output_nc[:len(audio_out_nochange)]
            output_nc+=audio_out_nochange

        output = output/num_singers
        output_nc = output_nc/num_singers

        sf.write(os.path.join(config.output_dir,'./{}_{}_{}_{}_unison.wav'.format(file_name.split('/')[-1][:-5], std, num_singers, timing)), output, config.fs)
        sf.write(os.path.join(config.output_dir,'./{}_{}_{}_{}_unison_notimbre.wav'.format(file_name.split('/')[-1][:-5], std, num_singers, timing)), output_nc, config.fs)

        audio = sig_process.feats_to_audio(mel)

コード例 #7

0

ファイルを表示

    def test_file_wav(self, file_name, speaker_index):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, stft = self.read_wav_file(file_name)

        out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index,
                                                     self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": mel[:,-2], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        file_name = file_name.split('/')[-1]

        if synth:
            gen_change = utils.query_yes_no("Change in gender? ")
            if gen_change:
                female_male = utils.query_yes_no("Female to male?")
                if female_male:
                    out_featss = np.concatenate(
                        (out_mel, out_f0 - 12, out_vuv), axis=-1)
                else:
                    out_featss = np.concatenate(
                        (out_mel, out_f0 + 12, out_vuv), axis=-1)
            else:
                out_featss = np.concatenate((out_mel, out_f0, out_vuv),
                                            axis=-1)

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir,
                    '{}_{}_SDN.wav'.format(file_name[:-4],
                                           config.singers[speaker_index])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(config.output_dir,
                             '{}_ori.wav'.format(file_name[:-4])), audio,
                config.fs)

コード例 #8

0

ファイルを表示

    def test_file_wav_f0(self, file_name, f0_file, speaker_index):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        mel, stft = self.read_wav_file(file_name)

        f0 = midi_process.open_f0_file(f0_file)

        timestamps = np.arange(0, len(mel) * config.hoptime, config.hoptime)

        f1 = vamp_notes.note2traj(f0, timestamps)

        f1 = sig_process.process_pitch(f1[:, 0])

        out_mel, out_f0, out_vuv = self.process_file(stft, speaker_index,
                                                     self.sess)

        plot_dict = {"Spec Envelope": {"gt": mel[:,:-6], "op": out_mel[:,:-4]}, "Aperiodic":{"gt": mel[:,-6:-2], "op": out_mel[:,-4:]},\
         "F0": {"gt": f1[:,0], "op": out_f0}, "Vuv": {"gt": mel[:,-1], "op": out_vuv}}

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        file_name = file_name.split('/')[-1]

        if synth:

            out_featss = np.concatenate((out_mel[:f1.shape[0]], f1), axis=-1)

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir,
                    '{}_{}_SDN_f0_{}.wav'.format(file_name[:-4],
                                                 config.singers[speaker_index],
                                                 f0_file.split('/')[-1])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(config.output_dir,
                             '{}_ori.wav'.format(file_name[:-4])), audio,
                config.fs)

コード例 #9

0

ファイルを表示

ファイル: autovc.py プロジェクト: MTG/content_choral_separation

    def solo_unison_file_hdf5(self,
                              file_name,
                              std=0.5,
                              num_singers=4,
                              timing: int = 5):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """
        part = file_name.split('_')[1]

        # import pdb;pdb.set_trace()

        if part[:-1] not in ["soprano", "alto", "tenor", "bass"
                             ] or not file_name.startswith("csd"):
            raise Exception("Input Error")

        if part[:-1] in ["soprano", "alto"]:
            speaker_indecis = [
                config.singers.index(x) for x in config.genders.keys()
                if config.genders[x] == "F" and x in config.nus_singers
            ]

        elif part[:-1] in ["tenor", "bass"]:
            speaker_indecis = [
                config.singers.index(x) for x in config.genders.keys()
                if config.genders[x] == "M" and x in config.nus_singers
            ]
        # ["tenor", "bass"]

        stft, mel = self.read_hdf5_file(file_name)

        speaker_name = file_name.split('_')[1]
        speaker_index = config.singers.index(speaker_name)

        # speaker_indecis = [config.singers.index(x) for x in config.singers if x.startswith(part[:-1]) and x != part]

        audio = sig_process.feats_to_audio(mel)
        sf.write(
            os.path.join(config.output_dir,
                         './{}_ori.wav'.format(file_name[:-5])), audio,
            config.fs)
        mel = self.read_hdf5_file(file_name)
        # vuv = mel[:,-1]
        # diffs = np.diff(vuv)
        output = audio
        for count in range(num_singers):
            speaker_index_2 = random.choice(speaker_indecis)

            out_mel = self.process_file(mel, speaker_index, speaker_index_2,
                                        self.sess)
            f0 = mel[:out_mel.shape[0], -2:-1] + np.random.rand(
                mel[:out_mel.shape[0], -2:-1].shape[0])[:, np.newaxis] * std
            out_featss = np.concatenate(
                (mel[:mel.shape[0]], f0, mel[:out_mel.shape[0], -1:]), axis=-1)
            if timing > 0:
                out_featss = np.roll(out_featss,
                                     np.random.randint(-timing, timing), 0)
            audio_out = sig_process.feats_to_audio(out_featss)
            output = output[:len(audio_out)]
            output += audio_out

        output = output / num_singers

        sf.write(
            os.path.join(
                config.output_dir, './{}_{}_{}_{}_unison_notimbre.wav'.format(
                    file_name[:-5], std, num_singers, timing)), output,
            config.fs)

        audio = sig_process.feats_to_audio(mel)

コード例 #10

0

ファイルを表示

ファイル: autovc.py プロジェクト: MTG/content_choral_separation

    def test_file_wav(self, file_name, speaker_index_1, speaker_index):
        """
        Function to extract multi pitch from file. Currently supports only HDF5 files.
        """

        speaker_gender = config.genders[config.singers[speaker_index_1]]
        print("Original singer is {}, a human {}".format(
            config.singers[speaker_index_1], speaker_gender))

        speaker_2_gender = config.genders[config.singers[speaker_index]]
        print("Target singer is {}, a human {}".format(
            config.singers[speaker_index], speaker_2_gender))
        stft, mel = self.read_wav_file(file_name)

        out_mel = self.process_file(mel, speaker_index_1, speaker_index,
                                    self.sess)

        plot_dict = {
            "Spec Envelope": {
                "gt": mel[:, :-6],
                "op": out_mel[:, :-4]
            },
            "Aperiodic": {
                "gt": mel[:, -6:-2],
                "op": out_mel[:, -4:]
            }
        }

        self.plot_features(plot_dict)

        synth = utils.query_yes_no("Synthesize output? ")

        if synth:
            if speaker_gender == "F" and speaker_2_gender == "M":
                out_featss = np.concatenate(
                    (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] -
                     12, mel[:out_mel.shape[0], -1:]),
                    axis=-1)
            elif speaker_gender == "M" and speaker_2_gender == "F":
                out_featss = np.concatenate(
                    (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1] +
                     12, mel[:out_mel.shape[0], -1:]),
                    axis=-1)
            else:
                out_featss = np.concatenate(
                    (out_mel[:mel.shape[0]], mel[:out_mel.shape[0], -2:-1],
                     mel[:out_mel.shape[0], -1:]),
                    axis=-1)
                # 0 = mel[:out_mel.shape[0],-2:-1] + np.random.rand(mel[:out_mel.shape[0],-2:-1].shape[0])* 0.5

            audio_out = sig_process.feats_to_audio(out_featss)

            sf.write(
                os.path.join(
                    config.output_dir, './{}_{}_autovc.wav'.format(
                        file_name[:-5], config.singers[speaker_index])),
                audio_out, config.fs)

        synth_ori = utils.query_yes_no(
            "Synthesize ground truth with vocoder? ")

        if synth_ori:
            audio = sig_process.feats_to_audio(mel)
            sf.write(
                os.path.join(
                    config.output_dir,
                    './{}_{}_ori.wav'.format(file_name[:-5],
                                             config.singers[speaker_index_1])),
                audio, config.fs)