def create_dest_dirs(self, species_list): ''' Creates all directories that will hold new spectrogram snippets for each species. For each directory: if dir exists: o if overwrite_policy is True, wipe the dir o if overwrite_policy is SKIP, leave the directory in place, contents intact o else ask user. If response is Yes, wipe the dir else raise FileExistsError :param species_list: names of species to process :type species_list: [str] :return: top level dir for spectrograms (same as self.out_dir) :rtype: (str) :raise FileExistsError: if a dest dir exists and not allowed to wipe it. ''' # Root dir of each species' spectro snippets: Utils.create_folder(self.out_dir, overwrite_policy=self.overwrite_policy) # One dir each for the spectrogram snippets of one species: for species in species_list: species_spectros_dir = os.path.join(self.out_dir, species) if not Utils.create_folder(species_spectros_dir, overwrite_policy=self.overwrite_policy): raise FileExistsError( f"Target dir {species_spectros_dir} exists; aborting") return self.out_dir
def create_dest_dirs(self, species_list): ''' Creates all directories that will hold new audio snippets and spectrograms for each species. For each directory: if dir exists: o if overwrite_policy is True, wipe the dir o else ask user. If response is Yes, wipe the dir else raise FileExistsError :param species_list: names of species to process :type species_list: [str] :return: top level dirs for audio snippets and spectrograms :rtype: (str) :raise FileExistsError: if a dest dir exists and not allowed to wipe it. ''' # Root dir of the two dirs that will hold new # audio snippet and spectrogram files utils.create_folder(self.out_dir, overwrite_policy=self.overwrite_policy) # Below the rootP spectrogram_dir_path = os.path.join(self.out_dir,'spectrograms/') wav_dir_path = os.path.join(self.out_dir,'wav-files/') if not utils.create_folder(spectrogram_dir_path, overwrite_policy=self.overwrite_policy): raise FileExistsError(f"Target dir {spectrogram_dir_path} exists; aborting") if not utils.create_folder(wav_dir_path, overwrite_policy=self.overwrite_policy): raise FileExistsError(f"Target dir {spectrogram_dir_path} exists; aborting") # One dir each for the audio and spectrogram # snippets of one species: for species in species_list: species_spectros_dir = os.path.join(spectrogram_dir_path, species) if not utils.create_folder(species_spectros_dir, overwrite_policy=self.overwrite_policy): raise FileExistsError(f"Target dir {species_spectros_dir} exists; aborting") species_audio_dir = os.path.join(wav_dir_path, species) if not utils.create_folder(species_audio_dir, overwrite_policy=self.overwrite_policy): raise FileExistsError(f"Target dir {species_audio_dir} exists; aborting") return(wav_dir_path, spectrogram_dir_path)
def __init__(self, input_dir_path, plot=False, overwrite_policy=False, aug_goals=AugmentationGoals.MEDIAN, random_augs = False, multiple_augs = False,): ''' :param input_dir_path: directory holding .wav files :type input_dir_path: str :param plot: whether or not to plot informative chars along the way :type plot: bool :param overwrite_policy: if true, don't ask each time previously created work will be replaced :type overwrite_policy: bool :param aug_goals: either an AugmentationGoals member, or a dict with a separate AugmentationGoals for each species: {species : AugmentationGoals} (See definition of AugmentationGoals; TENTH/MAX/MEDIAN) :type aug_goals: {AugmentationGoals | {str : AugmentationGoals}} :param random_augs: if this is true, will randomly choose augmentation to use for each new sample :type random_augs: bool :param multiple_augs: if we want to allow multiple augmentations per sample (e.g. time shift and volume)): :type multiple_augs: bool ''' self.log = LoggingService() if not isinstance(overwrite_policy, WhenAlreadyDone): raise TypeError(f"Overwrite policy must be a member of WhenAlreadyDone, not {type(overwrite_policy)}") if not os.path.isabs(input_dir_path): raise ValueError(f"Input path must be a full, absolute path; not {input_dir_path}") self.input_dir_path = input_dir_path self.multiple_augs = multiple_augs self.plot = plot self.overwrite_policy = overwrite_policy self.species_names = Utils.find_species_names(self.input_dir_path) # If aug_goals is not a dict mapping # each species to an aug_goals, but just # a single AugmentationGoals, create # a dict from all bird species, mapping # each to that same value: if type(aug_goals) != dict: aug_goals = {species : aug_goals for species in self.species_names } # Get dataframe with row lables being the # species, and one col with number of samples # in the respective species: # num_samples # sp1 10 # sp2 15 # .. self.sample_distrib_df = Utils.sample_compositions_by_species(input_dir_path, augmented=False) if plot: # Plot a distribution: self.sample_distrib_df.plot.bar() # Build a dict with number of augmentations to do # for each species: self.augs_to_do = Utils.compute_num_augs_per_species(aug_goals, self.sample_distrib_df) # Get input dir path without trailing slash: #**** canonical_in_path = str(Path(input_dir_path)) # Create the descriptive name of an output directory # for the augmented samples: if random_augs: os.path.join(Path(input_dir_path).parent, 'augmented_samples_random') self.output_dir_path = os.path.join(Path(input_dir_path).parent, 'augmented_samples_random') else: assert(self.ADD_NOISE + self.TIME_SHIFT + self.VOLUME == 1) dir_nm = f"Augmented_samples_-{self.ADD_NOISE:.2f}n-{self.TIME_SHIFT:.2f}ts-{self.VOLUME:.2f}w" self.output_dir_path = os.path.join(Path(input_dir_path).parent, dir_nm) if self.multiple_augs: self.output_dir_path += "/" else: # Indicate that augmentations are mutually exclusive self.output_dir_path += "-exc/" self.log.info(f"Results will be in {self.output_dir_path}") Utils.create_folder(self.output_dir_path, self.overwrite_policy) # Hide the UserWarning: PySoundFile failed. Trying audioread instead. warnings.filterwarnings(action="ignore", message="PySoundFile failed. Trying audioread instead.", category=UserWarning, module='', lineno=0)
def augment_one_species(self, in_dir, out_dir, num_augs_to_do): ''' Takes one species, and a number of audio augmentations to do. Generates the files, and returns a list of the newly created files (full paths). The maximum number of augmentations created depends on the number of audio augmentation methods available (currently 3), and the number of audio files available for the given species: num-available-audio-augs * num-of-audio-files If num_augs_to_do is higher than the above maximum, only that maximum is created. The rest will need to be accomplished by spectrogram augmentation in a different portion of the workflow. Augmentations are effectively done round robin across all of the species' audio files such that each file is augmented roughly the same number of times until num_augs_to_do is accomplished. :param in_dir: directory holding one species' audio files :type in_dir: str :param out_dir: destination for new audio files :type out_dir: src :param num_augs_to_do: number of augmentations :type num_augs_to_do: int :returns: list of newly created file paths :rtype: [src] ''' # By convention, species name is the last part of the directory: species_name = Path(in_dir).stem # Create subfolder for the given species: if not Utils.create_folder(out_dir, self.overwrite_policy): self.log.info(f"Skipping augmentations for {species_name}") return [] # Get dict: {full-path-to-an-audio_file : 0} # The zeroes will be counts of augmentations # needed for that file: in_wav_files = {full_in_path : 0 for full_in_path in Utils.listdir_abs(in_dir) } # Cannot do augmentations for species with 0 samples if len(in_wav_files) == 0: self.log.info(f"Skipping for {species_name} since there are no original samples.") return [] # Distribute augmenations across the original # input files: aug_assigned = 0 while aug_assigned < num_augs_to_do: for fname in in_wav_files.keys(): in_wav_files[fname] += 1 aug_assigned += 1 if aug_assigned >= num_augs_to_do: break new_sample_paths = [] failures = 0 for in_fname, num_augs_this_file in in_wav_files.items(): # Create augs with different methods: # Pick audio aug methods to apply (without replacement) # Note that if more augs are to be applied to each file # than methods are available, some methods will need # to be applied multiple times; no problem, as each # method includes randomness: max_methods_sample_size = min(len(list(AudAugMethod)), num_augs_this_file) methods = random.sample(list(AudAugMethod), max_methods_sample_size) # Now have something like: # [volume, time-shift], or all methods: [volume, time-shift, noise] if num_augs_this_file > len(methods): # Repeat the methods as often as # needed: num_method_set_repeats = int(math.ceil(num_augs_this_file/len(methods))) # The slice to num_augs_this_file chops off # the possible excess from the array replication: method_seq = (methods * num_method_set_repeats)[:num_augs_this_file] # Assuming num_augs_per_file is 7, we not have method_seq: # [m1,m2,m3,m1,m2,m3,m1] else: method_seq = methods for method in method_seq: out_path_or_err = self.create_new_sample(in_fname, out_dir, method) if isinstance(out_path_or_err, Exception): failures += 1 else: new_sample_paths.append(out_path_or_err) self.log.info(f"Audio aug report: {len(new_sample_paths)} new files; {failures} failures") return new_sample_paths, failures
def __init__(self, input_dir_path, output_dir_path, plot=False, overwrite_policy=False, aug_goals=AugmentationGoals.MEDIAN): ''' :param input_dir_path: directory holding .png files :type input_dir_path: str :param output_dir_path: root of destination dir under which each species' subdirectories will be placed. Augmentations will be placed in those subdirs. :type output_dir_path: str :param plot: whether or not to plot informative charts along the way :type plot: bool :param overwrite_policy: if true, don't ask each time previously created work will be replaced :type overwrite_policy: bool :param aug_goals: either an AugmentationGoals member, or a dict with a separate AugmentationGoals for each species: {species : AugmentationGoals} (See definition of AugmentationGoals; TENTH/MAX/MEDIAN) :type aug_goals: {AugmentationGoals | {str : AugmentationGoals}} ''' self.log = LoggingService() if not isinstance(overwrite_policy, WhenAlreadyDone): raise TypeError( f"Overwrite policy must be a member of WhenAlreadyDone, not {type(overwrite_policy)}" ) if not os.path.isabs(input_dir_path): raise ValueError( f"Input path must be a full, absolute path; not {input_dir_path}" ) self.input_dir_path = input_dir_path self.output_dir_path = output_dir_path self.plot = plot self.overwrite_policy = overwrite_policy self.species_names = Utils.find_species_names(self.input_dir_path) # Get dataframe with row lables being the # species, and one col with number of samples # in the respective species: # num_species # sp1 10 # sp2 15 # .. self.sample_distrib_df = Utils.sample_compositions_by_species( input_dir_path, augmented=False) if plot: # Plot a distribution: self.sample_distrib_df.plot.bar() # Build a dict with number of augmentations to do # for each species: self.augs_to_do = Utils.compute_num_augs_per_species( aug_goals, self.sample_distrib_df) self.log.info(f"Results will be in {self.output_dir_path}") Utils.create_folder(self.output_dir_path, self.overwrite_policy)