Ejemplo n.º 1
0
def test_list_wav_files():
    audio_files = ["147764-4-7-0.wav", "176787-5-0-0.wav", "40722-8-0-7.wav"]
    audio_path = "./tests/data/audio"
    wav_files = list_wav_files(audio_path)
    assert type(wav_files) is list
    assert len(wav_files) == 3
    for wf in audio_files:
        wp = os.path.join(audio_path, wf)
        assert wp in wav_files

    audio_path = "./tests/data_aiff/audio"
    wav_files = list_wav_files(audio_path)
    assert len(wav_files) == 0
Ejemplo n.º 2
0
    def process(self):
        """ Generate augmentated data for each file in dataset.

        Replicate the folder structure of {DATASET_PATH}/audio/original
        into the folder of each augmentation folder.

        """
        if not self.dataset.check_sampling_rate(self.sr):
            print("Changing sampling rate ...")
            self.dataset.change_sampling_rate(self.sr)
            print('Done!')

        # Get path to the original audio files and list of
        # folders with augmented files.
        _, sub_folders = self.get_audio_paths(self.sr)
        path_original = sub_folders[0]
        paths_augments = sub_folders[1:]

        for index in range(len(self.augmentations_list)):
            augmentation = self.augmentations_list[index]
            path_augmented = paths_augments[index]

            # Replicate folder structure of the original files into
            # the augmented folder.
            duplicate_folder_structure(path_original, path_augmented)
            # Process each file in path_original
            for path_to_file in progressbar(list_wav_files(path_original)):
                path_to_destination = path_to_file.replace(
                    path_original, path_augmented)
                if os.path.exists(path_to_destination):
                    continue
                augmentation['transformer'].build(path_to_file,
                                                  path_to_destination)
Ejemplo n.º 3
0
    def check_sampling_rate(self, sr):
        """ Checks if dataset was resampled before.

        For now, only checks if the folder {audio_path}{sr} exists and
        each wav file present in audio_path is also present in
        {audio_path}{sr}.

        Parameters
        ----------
        sr : int
            Sampling rate.

        Returns
        -------
        bool
            True if the dataset was resampled before.

        """

        audio_path, subfolders = self.get_audio_paths(sr)
        audio_folder_sr = subfolders[0]
        if not os.path.exists(audio_folder_sr):
            return False

        for path_to_file in list_wav_files(self.audio_path):
            path_to_destination = path_to_file.replace(self.audio_path,
                                                       audio_folder_sr)
            # TODO: check if the audio file was resampled correctly,
            # not only if exists.
            if not os.path.exists(path_to_destination):
                return False
        return True
Ejemplo n.º 4
0
    def change_sampling_rate(self, new_sr):
        """ Changes the sampling rate of each wav file in audio_path.

        Creates a new folder named audio_path{new_sr} (i.e audio22050)
        and converts each wav file in audio_path and save the result in
        the new folder.

        Parameters
        ----------
        sr : int
            Sampling rate.

        """
        new_audio_path, subfolders = self.get_audio_paths(new_sr)
        new_audio_folder = subfolders[0]  # audio22050/original
        duplicate_folder_structure(self.audio_path, new_audio_folder)

        tfm = sox.Transformer()
        tfm.convert(samplerate=new_sr)

        for path_to_file in progressbar(list_wav_files(self.audio_path)):
            path_to_destination = path_to_file.replace(self.audio_path,
                                                       new_audio_folder)
            if os.path.exists(path_to_destination):
                continue
            tfm.build(path_to_file, path_to_destination)
Ejemplo n.º 5
0
    def generate_file_lists(self):
        for fold in self.fold_list:
            audio_folder = os.path.join(self.audio_path, fold)
            self.file_lists[fold] = list_wav_files(audio_folder)

        self.wav_to_labels = {}
        for fold in self.fold_list:
            for fil in self.file_lists[fold]:
                label_file = os.path.basename(fil).split('.')[0] + '.txt'
                self.wav_to_labels[fil] = os.path.join(self.annotations_folder,
                                                       fold, label_file)
Ejemplo n.º 6
0
 def generate_file_lists(self):
     self.file_lists = {}
     for fold in self.fold_list:
         self.file_lists[fold] = []
         # all_files = sorted(
         #    glob.glob(os.path.join(self.audio_path, '*.wav')))
         all_files = list_wav_files(self.audio_path)
         for fil in all_files:
             basename = self.get_basename_wav(fil)
             if basename in self.metadata:
                 if self.metadata[basename]['fold'] == fold:
                     self.file_lists[fold].append(fil)
Ejemplo n.º 7
0
 def generate_file_lists(self):
     self.file_lists = {}
     all_files = list_wav_files(self.audio_path)
     assert len(all_files) != 0
     for fold in self.fold_list:
         if fold == 'train':
             metadata_fold = self.metadata[self.metadata['split'] == fold]
         else:
             metadata_fold = self.metadata[(
                 (self.metadata['split'] == fold) &
                 (self.metadata['annotator_id'] == 0))]
         filename_list_fold = metadata_fold[
             'audio_filename'].drop_duplicates().to_list()
         self.file_lists[fold] = []
         for fil in all_files:
             basename = os.path.basename(fil)
             if basename in filename_list_fold:
                 self.file_lists[fold].append(fil)
Ejemplo n.º 8
0
    def extract(self, dataset):
        """ Extracts features for each file in dataset.

        Call calculate() for each file in dataset and save the
        result into the features path.

        Parameters
        ----------
        dataset : Dataset
            Instance of the dataset.

        """
        features_path = self.get_features_path(dataset)
        mkdir_if_not_exists(features_path, parents=True)

        if not dataset.check_sampling_rate(self.sr):
            print('Changing sampling rate ...')
            dataset.change_sampling_rate(self.sr)
            print('Done!')

        # Define path to audio and features folders
        audio_path, subfolders = dataset.get_audio_paths(self.sr)

        # Duplicate folder structure of audio in features folder
        duplicate_folder_structure(audio_path, features_path)
        for audio_folder in subfolders:
            subfolder_name = os.path.basename(audio_folder)
            features_path_sub = os.path.join(features_path, subfolder_name)
            if not self.check_if_extracted_path(features_path_sub):
                # Navigate in the structure of audio folder and extract
                # features of the each wav file
                for path_audio in progressbar(list_wav_files(audio_folder)):
                    features_array = self.calculate(path_audio)
                    path_to_features_file = path_audio.replace(
                        audio_path, features_path)
                    path_to_features_file = path_to_features_file.replace(
                        'wav', 'npy')
                    np.save(path_to_features_file, features_array)

                # Save parameters.json for future checking
                self.set_as_extracted(features_path_sub)
Ejemplo n.º 9
0
 def generate_file_lists(self):
     for fold in self.fold_list:
         audio_folder = os.path.join(self.audio_path, fold)
         self.file_lists[fold] = list_wav_files(audio_folder)