Esempio n. 1
0
    def load_directory(self, directory_path, condition):

        filenames = [
            filename for filename in os.listdir(directory_path)
            if filename.endswith('.wav')
        ]

        speakers = []
        file_paths = []
        speech_onset_offset_indices = []
        regain_factors = []
        sequences = []
        for filename in filenames:

            speaker_name = filename[0:4]
            speakers.append(speaker_name)

            filepath = os.path.join(directory_path, filename)

            if condition == 'clean':

                sequence = util.load_wav(filepath, self.sample_rate)
                sequences.append(sequence)
                self.num_sequences_in_memory += 1
                regain_factors.append(self.regain / util.rms(sequence))
                #如果extract_voice为true,则需要进行去除前后静音操作
                if self.extract_voice:
                    #speech_onset_offset_indices是非静音段的起止点
                    speech_onset_offset_indices.append(
                        util.get_subsequence_with_speech_indices(sequence))
            else:
                if self.in_memory_percentage == 1 or np.random.uniform(
                        0, 1) <= (self.in_memory_percentage - 0.5) * 2:
                    sequence = util.load_wav(filepath, self.sample_rate)
                    sequences.append(sequence)
                    self.num_sequences_in_memory += 1
                else:
                    sequences.append([-1])

            if speaker_name not in self.speaker_mapping:
                self.speaker_mapping[speaker_name] = len(
                    self.speaker_mapping) + 1

            file_paths.append(filepath)

        return sequences, file_paths, speakers, speech_onset_offset_indices, regain_factors
    def load_directory(self, filenames, spk):
        speakers = []
        file_paths = []
        speech_onset_offset_indices = []
        regain_factors = []
        sequences = []
        for filename in filenames:
            speaker_name = filename.split('/')[-1].split('_')[0][:3] if spk=='a' else \
                filename.split('/')[-1].split('_')[2][:3]
            speakers.append(speaker_name)

            sequence = util.load_wav(filename, self.sample_rate)
            sequences.append(sequence)
            self.num_sequences_in_memory += 1
            # regain_factors.append(self.regain / util.rms(sequence))

            if self.extract_voice:
                # get sub-sequence without front and ending silence
                speech_onset_offset_indices.append(util.get_subsequence_with_speech_indices(sequence))

            if speaker_name not in self.speaker_mapping:
                self.speaker_mapping[speaker_name] = len(self.speaker_mapping) + 1

        return sequences, speakers, speech_onset_offset_indices, regain_factors