コード例 #1
0
    def Preprocessing_general_speaker(self):
        """
        Go through the sentences one by one.
            - reads ema data and turn it to a (K,18) array where arti are in a precise order, interploate missing values,
        smooth the trajectories, remove silences at the beginning and the end, undersample to have 1 position per
        frame mfcc, add it to the list of EMA traj for this speaker
            - reads the wav file, calculate the associated acoustic features (mfcc+delta+ deltadelta+contextframes) ,
        add it to the list of the MFCC FEATURES for this speaker.
        Then calculate the normvalues based on the list of ema/mfcc data for this speaker
        Finally : normalization and last smoothing of the trajectories.
        Final data are in Preprocessed_data/speaker/ema_final.npy and  mfcc.npy
        """

        self.create_missing_dir()
        N = len(self.EMA_files)
        if self.N_max != 0:
            N = self.N_max
        for i in range(N):
            ema = self.read_ema_file(i)
            ema_VT = self.add_vocal_tract(ema)
            ema_VT_smooth = self.smooth_data(ema_VT)
            path_wav = os.path.join(self.path_wav_files,
                                    self.EMA_files[i] + '.wav')
            wav, sr = librosa.load(path_wav, sr=self.sampling_rate_wav)
            wav = 0.5 * wav / np.max(wav)
            mfcc = self.from_wav_to_mfcc(wav)
            ema_VT_smooth, mfcc = self.remove_silences(i, ema_VT_smooth, mfcc)
            ema_VT_smooth, mfcc = self.synchro_ema_mfcc(ema_VT_smooth, mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema", self.EMA_files[i]), ema_VT)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files[i]), mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files[i]), ema_VT_smooth)
            self.list_EMA_traj.append(ema_VT_smooth)
            self.list_MFCC_frames.append(mfcc)
        self.calculate_norm_values()

        for i in range(N):
            ema_VT_smooth = np.load(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files[i] + ".npy"))
            mfcc = np.load(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files[i] + ".npy"))
            ema_VT_smooth_norma, mfcc = self.normalize_sentence(
                i, ema_VT_smooth, mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files[i]), mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files[i]),
                ema_VT_smooth_norma)
        #  split_sentences(speaker)
        get_fileset_names(self.speaker)
コード例 #2
0
    def Preprocessing_general_speaker(self):

        self.create_missing_dir()
        N = len(self.EMA_files)
        if self.N_max != 0:
            N = self.N_max
        for i in range(N):
            ema = self.read_ema_file(i)
            ema_VT = self.add_vocal_tract(ema)
            ema_VT_smooth = self.smooth_data(ema_VT)
            path_wav = os.path.join(self.path_wav_files,
                                    self.EMA_files[i] + '.wav')
            wav, sr = librosa.load(path_wav, sr=self.sampling_rate_wav)
            wav = 0.5 * wav / np.max(wav)
            mfcc = self.from_wav_to_mfcc(wav)
            ema_VT_smooth, mfcc = self.remove_silences(self.EMA_files[i],
                                                       ema_VT_smooth, mfcc)
            n_frames_wanted = mfcc.shape[0]
            ema_VT_smooth = scipy.signal.resample(ema, num=n_frames_wanted)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema", self.EMA_files[i]), ema_VT)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files[i]), mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files[i]), ema_VT_smooth)
            self.list_EMA_traj.append(ema_VT_smooth)
            self.list_MFCC_frames.append(mfcc)
        self.calculate_norm_values()

        for i in range(N):
            ema_VT_smooth = np.load(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files[i] + ".npy"))
            mfcc = np.load(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files[i] + ".npy"))
            ema_VT_smooth_norma, mfcc = self.normalize_sentence(
                i, ema_VT_smooth, mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files[i]), mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files[i]),
                ema_VT_smooth_norma)
        #  split_sentences(speaker)
        get_fileset_names(self.speaker)
コード例 #3
0
    def Preprocessing_general_speaker(self):
        """
        Go through the sentences one by one.
            - reads ema data and turn it to a (K,18) array where arti are in a precise order, interploate missing values,
        smooth the trajectories, remove silences at the beginning and the end, undersample to have 1 position per
        frame mfcc, add it to the list of EMA traj for this speaker
            - reads the wav file, calculate the associated acoustic features (mfcc+delta+ deltadelta+contextframes) ,
        add it to the list of the MFCC FEATURES for this speaker.
        Then calculate the normvalues based on the list of ema/mfcc data for this speaker
        Finally : normalization and last smoothing of the trajectories.
        Final data are in Preprocessed_data/speaker/ema_final.npy and  mfcc.npy
        """

        self.create_missing_dir()
        EMA_files = sorted([
            name[:-4]
            for name in os.listdir(os.path.join(self.path_files_brutes, "mat"))
            if name.endswith(".mat")
        ])

        N = len(EMA_files)
        if self.N_max != 0:
            N = min(
                int(self.N_max / 3), N
            )  # majoration:if we want to preprocess N_max sentences, about N_max/6 files

        self.get_data_per_sentence(
        )  # one file contains several sentences, this create one file per sentence
        self.EMA_files_2 = sorted([
            name[:-4] for name in os.listdir(
                os.path.join(self.path_files_brutes, "wav_cut"))
            if name.endswith(".wav")
        ])
        N_2 = len(self.EMA_files_2)
        if self.N_max != 0:
            N_2 = min(self.N_max, N_2)

        for i in range(N_2):
            ema = self.read_ema_file(i)
            ema_VT = self.add_vocal_tract(ema)
            ema_VT_smooth = self.smooth_data(
                ema_VT)  # smooth for better calculation of norm values
            mfcc = self.from_wav_to_mfcc(i)
            ema_VT_smooth, mfcc = self.remove_silences(i, ema_VT_smooth, mfcc)
            ema_VT_smooth, mfcc = self.synchro_ema_mfcc(ema_VT_smooth, mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema", self.EMA_files_2[i]), ema_VT)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files_2[i]), mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files_2[i]), ema_VT_smooth)
            self.list_EMA_traj.append(ema_VT_smooth)
            self.list_MFCC_frames.append(mfcc)
        self.calculate_norm_values()

        for i in range(N_2):
            ema_VT_smooth = np.load(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files_2[i] + ".npy"))
            mfcc = np.load(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files_2[i] + ".npy"))
            ema_VT_smooth_norma, mfcc = self.normalize_sentence(
                i, ema_VT_smooth, mfcc)
            new_sr = 1 / self.hop_time
            ema_VT_smooth_norma = self.smooth_data(ema_VT_smooth_norma, new_sr)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "mfcc", self.EMA_files_2[i]), mfcc)
            np.save(
                os.path.join(root_path, "Preprocessed_data", self.speaker,
                             "ema_final", self.EMA_files_2[i]),
                ema_VT_smooth_norma)

    #  split_sentences(self.speaker)
        get_fileset_names(self.speaker)