def test_chop_one_spectrogram_file(self): with tempfile.TemporaryDirectory(dir='/tmp', prefix='chopping', ) as dir_nm: chopper = SpectrogramChopper( self.spectro_root, dir_nm, overwrite_policy=WhenAlreadyDone.OVERWRITE ) species = Path(self.spectro_file).parent.stem outdir = os.path.join(dir_nm, species) true_snippet_time_width = chopper.chop_one_spectro_file( self.spectro_file, outdir, 'DOVE', skip_size=self.skip_size ) snippet_names = os.listdir(outdir) num_expected_snippets = 0 cur_time = true_snippet_time_width while cur_time < self.duration: num_expected_snippets += 1 cur_time += self.skip_size self.assertEqual(len(snippet_names), num_expected_snippets) # Check embedded metadata of one snippet: _spectro, metadata = SoundProcessor.load_spectrogram(Utils.listdir_abs(outdir)[0]) self.assertEqual(round(float(metadata['duration(secs)']), 3), round(true_snippet_time_width, 3) ) self.assertEqual(metadata['species'], 'DOVE')
def prep_aug_tmp_dirs(self, dst_tmp_dir): ''' Copies AMADEC single-spectrogram directory, and FORANA 5-spectrogram dir to the given tmp dir. Creates dir 'aug_spectros' in that same tmp dir. Returns path to that aug_spectros dir. :param dst_tmp_dir: temporary directory :type dst_tmp_dir: src :return: output directory for future spectro augmentations :rtype: str ''' # Do all testing in the tmp dir, where # all files/dirs will be deleted automatically: for species_dir in Utils.listdir_abs(self.full_species_root): species_name = Path(species_dir).stem dst_species_dir = os.path.join(dst_tmp_dir, species_name) shutil.copytree(species_dir, dst_species_dir) # Dir where augmentations are to be placed, # one subdir per species: out_dir = os.path.join(dst_tmp_dir, 'aug_spectros') os.mkdir(out_dir) return out_dir
def test_listdir_abs(self): # Get the built-in directory listing # with just the file names: nearly_truth = os.listdir(self.cur_dir) abs_paths = Utils.listdir_abs(self.cur_dir) self.assertEquals(len(nearly_truth), len(abs_paths)) # Check existence of first file or dir: self.assertTrue(os.path.exists(abs_paths[0]))
def setUpClass(cls): cls.cur_dir = os.path.dirname(__file__) cls.model_path = os.path.join( cls.cur_dir, '../../birdsong/tests/models/mod_2021-05-04T13_02_14_net_resnet18_pre_True_frz_0_lr_0.01_opt_SGD_bs_128_ks_7_folds_10_gray_True_classes_34_ep9.pth' ) cls.snips_dir = os.path.join( cls.cur_dir, '../../birdsong/utils/tests/data/fld_snippets' ) cls.example_img_path = Utils.listdir_abs(cls.snips_dir)[0]
def check_spectro_sanity(self, dirs_filled): ''' Raises assertion error if any file in the passed-in list of directories is less than 5000 bytes long :param dirs_filled: list of directories whose content files to check for size :type dirs_filled: [str] ''' # Check that each spectro is of # reasonable size: for species_dst_dir in dirs_filled: for spec_file in Utils.listdir_abs(species_dst_dir): self.assertTrue(os.stat(spec_file).st_size > 5000)
def record_creation_times(self, dirs_filled): ''' Given list of absolute file paths, return a dict mapping each path to a Unix modification time in fractional epoch seconds :param dirs_filled: list of absolute file paths :type dirs_filled: [str] :return dict of modification times :rtype {str : float} ''' file_times = {} for species_dst_dir in dirs_filled: for spec_fname in Utils.listdir_abs(species_dst_dir): file_times[spec_fname] = os.path.getmtime(spec_fname) return file_times
def test_generate_all_augmentations_max(self): with tempfile.TemporaryDirectory(dir='/tmp', prefix='test_spectro') as dst_dir: out_dir = self.prep_aug_tmp_dirs(dst_dir) # Tell the augmenter where the src and dest roots are: self.spectro_augmenter_max.input_dir_path = dst_dir self.spectro_augmenter_max.output_dir_path = out_dir # AMADEC has 1 spectro, FORANA has 5 # MAX is 5, So AMADEC needs 4 augementation: num_augs_needed = 4 self.spectro_augmenter_max.generate_all_augmentations() # Should have one directory in aug_spectros new_dirs = Utils.listdir_abs(out_dir) self.assertTrue(len(new_dirs) == 1) # AMADEC subdir should have 2 new files new_files = os.listdir(new_dirs[0]) self.assertTrue(len(new_files), num_augs_needed)
def augment_one_species(self, in_dir, out_dir, num_augs_to_do): ''' Takes one species, and a number of audio augmentations to do. Generates the files, and returns a list of the newly created files (full paths). The maximum number of augmentations created depends on the number of audio augmentation methods available (currently 3), and the number of audio files available for the given species: num-available-audio-augs * num-of-audio-files If num_augs_to_do is higher than the above maximum, only that maximum is created. The rest will need to be accomplished by spectrogram augmentation in a different portion of the workflow. Augmentations are effectively done round robin across all of the species' audio files such that each file is augmented roughly the same number of times until num_augs_to_do is accomplished. :param in_dir: directory holding one species' audio files :type in_dir: str :param out_dir: destination for new audio files :type out_dir: src :param num_augs_to_do: number of augmentations :type num_augs_to_do: int :returns: list of newly created file paths :rtype: [src] ''' # By convention, species name is the last part of the directory: species_name = Path(in_dir).stem # Create subfolder for the given species: if not Utils.create_folder(out_dir, self.overwrite_policy): self.log.info(f"Skipping augmentations for {species_name}") return [] # Get dict: {full-path-to-an-audio_file : 0} # The zeroes will be counts of augmentations # needed for that file: in_wav_files = {full_in_path : 0 for full_in_path in Utils.listdir_abs(in_dir) } # Cannot do augmentations for species with 0 samples if len(in_wav_files) == 0: self.log.info(f"Skipping for {species_name} since there are no original samples.") return [] # Distribute augmenations across the original # input files: aug_assigned = 0 while aug_assigned < num_augs_to_do: for fname in in_wav_files.keys(): in_wav_files[fname] += 1 aug_assigned += 1 if aug_assigned >= num_augs_to_do: break new_sample_paths = [] failures = 0 for in_fname, num_augs_this_file in in_wav_files.items(): # Create augs with different methods: # Pick audio aug methods to apply (without replacement) # Note that if more augs are to be applied to each file # than methods are available, some methods will need # to be applied multiple times; no problem, as each # method includes randomness: max_methods_sample_size = min(len(list(AudAugMethod)), num_augs_this_file) methods = random.sample(list(AudAugMethod), max_methods_sample_size) # Now have something like: # [volume, time-shift], or all methods: [volume, time-shift, noise] if num_augs_this_file > len(methods): # Repeat the methods as often as # needed: num_method_set_repeats = int(math.ceil(num_augs_this_file/len(methods))) # The slice to num_augs_this_file chops off # the possible excess from the array replication: method_seq = (methods * num_method_set_repeats)[:num_augs_this_file] # Assuming num_augs_per_file is 7, we not have method_seq: # [m1,m2,m3,m1,m2,m3,m1] else: method_seq = methods for method in method_seq: out_path_or_err = self.create_new_sample(in_fname, out_dir, method) if isinstance(out_path_or_err, Exception): failures += 1 else: new_sample_paths.append(out_path_or_err) self.log.info(f"Audio aug report: {len(new_sample_paths)} new files; {failures} failures") return new_sample_paths, failures
def create_snips_gen_for_sel_tbls(self, snippets_src, sel_tables_src): ''' Given one or more Raven selection tables, and one or more recording snippet paths, return a dict: {<recording-id> : SelTblSnipsAssoc-inst<table-path, snippets-dir>} where recording-id is like AM01_20190719_063242; table-path is the full path to one selection table with the respective recording-id, and snippets-dir is root of a director containing the snippets covered in the recording. Usage concept: o There are relatively few selection tables, since they are human-generated o There can be thousands of snippet .png files whose time spans are covered in one table o The data structure returned from this method can be used like this: tbl_snips_match = create_snips_gen_for_sel_tbls('/foo/my_snips', '/bar/my_tbls') # For each selection table, work on the snippets # that are covered by that table for rec_id in tbl_snips_match: for snip_path in tbl_snips_match.snips_iterator(): <do something with spectrogram snippet> :param snippets_src: iterable over absolute paths to snippets, or the absolute path to a directory :type snippets_src: {Iterator(str) | str} :param sel_tables_src: absolute path to selection table, or path to a directory that contains selection tables, or iterator over absolute paths to selection tables :type sel_tables_src: str :returned dict mapping recording ID to SelTblSnipsAssoc instances :rtype {str : SelTblSnipsAssoc} ''' # Table paths may be an individual # file, a directory, or a generator # of absolute paths. Sanity checks: if type(sel_tables_src) == str: if not os.path.isabs(sel_tables_src): raise ValueError( f"Table paths must be a generator, or an absolute path to a selection table or dir" ) if os.path.isfile(sel_tables_src): sel_tables_src = [sel_tables_src] elif os.path.isdir(sel_tables_src): sel_tables_src = Utils.listdir_abs(sel_tables_src) # If not a string, sel_tables_src better be a generator: elif not isinstance(sel_tables_src, types.GeneratorType): raise ValueError( f"Table paths must be a generator, or an absolute path to a selection table or dir" ) # Same checks for snippet location: if type(snippets_src) == str: if not os.path.isabs(snippets_src) \ or not os.path.isdir(snippets_src): raise ValueError( f"Snippet paths must be a generator, or an absolute path to a snippet dir" ) snippets_src = iter(Utils.listdir_abs(snippets_src)) # If not a string, snippets_src better be a generator: elif not isinstance(sel_tables_src, types.GeneratorType): raise ValueError( f"Snippets src must be a generator, or an absolute path to dir" ) # Build a dict: # {<recording_id> : <dir-of-matching-snippets>} recording_selection_tables = {} for table_path in sel_tables_src: recording_id = self.extract_recording_id(table_path) if recording_id is not None: recording_selection_tables[recording_id] = \ SelTblSnipsAssoc(table_path, snippets_src) return recording_selection_tables