def test_sample_compositions_by_species(self): dist_df = Utils.sample_compositions_by_species(self.spectros_dir) #truth = pd.DataFrame.from_dict({'AMADEC' : 1, 'FORANA' : 5}, orient='index', columns=['num_samples']) self.assertListEqual(list(dist_df.columns), ['num_samples']) self.assertEqual(int(dist_df.loc['AMADEC']), 1) self.assertEqual(int(dist_df.loc['FORANA']), 5)
def test_compute_num_augs_per_species(self): aug_volumes = AugmentationGoals.MAX sample_distrib_df = Utils.sample_compositions_by_species( self.spectros_dir) augs_to_do = Utils.compute_num_augs_per_species( aug_volumes, sample_distrib_df) self.assertEqual(augs_to_do['AMADEC'], 4) self.assertEqual(augs_to_do['FORANA'], 0)
def __init__(self, input_dir_path, plot=False, overwrite_policy=False, aug_goals=AugmentationGoals.MEDIAN, random_augs = False, multiple_augs = False,): ''' :param input_dir_path: directory holding .wav files :type input_dir_path: str :param plot: whether or not to plot informative chars along the way :type plot: bool :param overwrite_policy: if true, don't ask each time previously created work will be replaced :type overwrite_policy: bool :param aug_goals: either an AugmentationGoals member, or a dict with a separate AugmentationGoals for each species: {species : AugmentationGoals} (See definition of AugmentationGoals; TENTH/MAX/MEDIAN) :type aug_goals: {AugmentationGoals | {str : AugmentationGoals}} :param random_augs: if this is true, will randomly choose augmentation to use for each new sample :type random_augs: bool :param multiple_augs: if we want to allow multiple augmentations per sample (e.g. time shift and volume)): :type multiple_augs: bool ''' self.log = LoggingService() if not isinstance(overwrite_policy, WhenAlreadyDone): raise TypeError(f"Overwrite policy must be a member of WhenAlreadyDone, not {type(overwrite_policy)}") if not os.path.isabs(input_dir_path): raise ValueError(f"Input path must be a full, absolute path; not {input_dir_path}") self.input_dir_path = input_dir_path self.multiple_augs = multiple_augs self.plot = plot self.overwrite_policy = overwrite_policy self.species_names = Utils.find_species_names(self.input_dir_path) # If aug_goals is not a dict mapping # each species to an aug_goals, but just # a single AugmentationGoals, create # a dict from all bird species, mapping # each to that same value: if type(aug_goals) != dict: aug_goals = {species : aug_goals for species in self.species_names } # Get dataframe with row lables being the # species, and one col with number of samples # in the respective species: # num_samples # sp1 10 # sp2 15 # .. self.sample_distrib_df = Utils.sample_compositions_by_species(input_dir_path, augmented=False) if plot: # Plot a distribution: self.sample_distrib_df.plot.bar() # Build a dict with number of augmentations to do # for each species: self.augs_to_do = Utils.compute_num_augs_per_species(aug_goals, self.sample_distrib_df) # Get input dir path without trailing slash: #**** canonical_in_path = str(Path(input_dir_path)) # Create the descriptive name of an output directory # for the augmented samples: if random_augs: os.path.join(Path(input_dir_path).parent, 'augmented_samples_random') self.output_dir_path = os.path.join(Path(input_dir_path).parent, 'augmented_samples_random') else: assert(self.ADD_NOISE + self.TIME_SHIFT + self.VOLUME == 1) dir_nm = f"Augmented_samples_-{self.ADD_NOISE:.2f}n-{self.TIME_SHIFT:.2f}ts-{self.VOLUME:.2f}w" self.output_dir_path = os.path.join(Path(input_dir_path).parent, dir_nm) if self.multiple_augs: self.output_dir_path += "/" else: # Indicate that augmentations are mutually exclusive self.output_dir_path += "-exc/" self.log.info(f"Results will be in {self.output_dir_path}") Utils.create_folder(self.output_dir_path, self.overwrite_policy) # Hide the UserWarning: PySoundFile failed. Trying audioread instead. warnings.filterwarnings(action="ignore", message="PySoundFile failed. Trying audioread instead.", category=UserWarning, module='', lineno=0)
def __init__(self, input_dir_path, output_dir_path, plot=False, overwrite_policy=False, aug_goals=AugmentationGoals.MEDIAN): ''' :param input_dir_path: directory holding .png files :type input_dir_path: str :param output_dir_path: root of destination dir under which each species' subdirectories will be placed. Augmentations will be placed in those subdirs. :type output_dir_path: str :param plot: whether or not to plot informative charts along the way :type plot: bool :param overwrite_policy: if true, don't ask each time previously created work will be replaced :type overwrite_policy: bool :param aug_goals: either an AugmentationGoals member, or a dict with a separate AugmentationGoals for each species: {species : AugmentationGoals} (See definition of AugmentationGoals; TENTH/MAX/MEDIAN) :type aug_goals: {AugmentationGoals | {str : AugmentationGoals}} ''' self.log = LoggingService() if not isinstance(overwrite_policy, WhenAlreadyDone): raise TypeError( f"Overwrite policy must be a member of WhenAlreadyDone, not {type(overwrite_policy)}" ) if not os.path.isabs(input_dir_path): raise ValueError( f"Input path must be a full, absolute path; not {input_dir_path}" ) self.input_dir_path = input_dir_path self.output_dir_path = output_dir_path self.plot = plot self.overwrite_policy = overwrite_policy self.species_names = Utils.find_species_names(self.input_dir_path) # Get dataframe with row lables being the # species, and one col with number of samples # in the respective species: # num_species # sp1 10 # sp2 15 # .. self.sample_distrib_df = Utils.sample_compositions_by_species( input_dir_path, augmented=False) if plot: # Plot a distribution: self.sample_distrib_df.plot.bar() # Build a dict with number of augmentations to do # for each species: self.augs_to_do = Utils.compute_num_augs_per_species( aug_goals, self.sample_distrib_df) self.log.info(f"Results will be in {self.output_dir_path}") Utils.create_folder(self.output_dir_path, self.overwrite_policy)