def __init__(self, selection_tbl_loc, spectrogram_locs, out_dir, unittesting=False): ''' Create snippet copies into out_dir for all snippets that are covered by any of the given selection tables. :param selection_tbl_loc: path to individual selection table or a directory containing selection tables. Each tbl is a tsv file with extension .txt :type selection_tbl_loc: str :param spectrogram_locs: individual or directory of spectrogram snippets. :type spectrogram_locs: str :param out_dir: destination of snippet copies :type out_dir: src :param unittesting: if True, does not initialize the instance, or run any operations :type unittesting: bool ''' if unittesting: return if not os.path.exists(selection_tbl_loc): print(f"Cannot open {selection_tbl_loc}") sys.exit(1) if not os.path.exists(spectrogram_locs): print(f"Spectrogram snippets {spectrogram_locs} not found") sys.exit(1) # Is path to sel tbl an individual tsv file? if os.path.isfile(selection_tbl_loc): table_paths = iter([selection_tbl_loc]) else: # Caller gave directory of .csv files. # Get them all recursively: table_paths = Utils.find_in_tree_gen(selection_tbl_loc, pattern="*.txt") # Is snippets path to an individual .png snippet file? if os.path.isfile(spectrogram_locs): snippet_paths = iter([spectrogram_locs]) else: # Caller gave directory of .png files. # Get them all recursively: snippet_paths = Utils.find_in_tree_gen(spectrogram_locs, pattern="*.png") # If out_dir does not exist, create it, # and all dirs along the path: if not os.path.exists(out_dir): os.makedirs(out_dir) # Get dict: # {<recording-id> : SelTblSnipsAssoc-instance} # where each SelTblSnipsAssoc instance is a generator # of snippet metadata from snippet that are covered in # the selection table that is associated with the instance. # In addition the absolute snippet path is added as # entry of key 'snip_path'. # # The generator feeds out the snippet metadata in order of # start time. # # For brevity, call each instance of SelTblSnipsAssoc # an 'assoc' rec_id_assocs = self.create_snips_gen_for_sel_tbls( snippet_paths, table_paths) for assoc in rec_id_assocs.values(): # The assoc focuses on a single selection # table, and the snippets it covers. # Get the info contained in each row of # the sel tb. This will be a list of dicts, each with # the information from one selection tbl row: selections = Utils.read_raven_selection_table( assoc.raven_sel_tbl_path) # Go through each snippet in the association, enrich its # metadata with species info. Then copy the enriched # snippet to the target dir: for snip_metadata in iter(assoc): self.match_snippet(selections, snip_metadata, out_dir)
def test_match_snippet(self): # Get list of dicts; each dict will contain # info from one row in the selection table # at self.sel_tbl_path1: sel_dicts = Utils.read_raven_selection_table(self.sel_tbl_path1) # The list is expected to look like this: # # [{'Selection': '5', # 'View': 'Spectrogram 1', # 'Channel': '1', # 'Begin Time (s)': 0.014506764, # 'End Time (s)': 4.656671354, # 'Low Freq (Hz)': 345.6, # 'High Freq (Hz)': 22050.0, # 'Delta Time (s)': '4.6422', # 'species': 'no bird', # 'type': '', # 'number': '', # 'mix': [], # 'time_interval': {'low_val': 0.014506764, # 'high_val': 4.656671354}, # 'freq_interval': {'low_val': 345.6, # 'high_val': 22050.0} # }, # ... # ] # # and there should be as many entries as there # are rows in the table: with open(self.sel_tbl_path1, 'r') as fd: all_lines = fd.readlines() # The '-1' subtracts the sel table's # column header line: self.assertEqual(len(sel_dicts), len(all_lines) - 1) # Create a dict: # {<recording-id> : SelectionTblSnipsAssoc-instance} # # Each SelectionTblSnipsAssoc instance is a generator of snippets # from one recording. The recording-id is the part of # field recording and selection table file names like: # AM01_20190712_050000 rec_id_to_sel_tbl_snips_gens = self.mapper.create_snips_gen_for_sel_tbls( self.tst_snips_dir, self.sel_tbl_dir) # We have two test select tables, so the # number of SelectionTblSnipsAssoc instances # should match: num_sel_tbls = len(os.listdir(self.sel_tbl_dir)) self.assertEqual(len(rec_id_to_sel_tbl_snips_gens), num_sel_tbls) # Examine the rec-id-->snippet-generator for # recording AM01_20190711_170000: assoc170k = rec_id_to_sel_tbl_snips_gens['AM01_20190711_170000'] self.assertEqual(assoc170k.rec_id, 'AM01_20190711_170000') self.assertTrue(isinstance(assoc170k.snip_dir, Iterable)) # Get number of snippets with recording id # of 'AM01_20190711_170000' that we have among # the test snippets: snip_metadata_dicts = list(assoc170k) snip_fnames = [ snip_metadata_dict['snip_path'] for snip_metadata_dict in snip_metadata_dicts ] snips_fname_filter = filter(self.mapper.extract_recording_id, snip_fnames) snips_list = list(snips_fname_filter) num_tst_snips = len(snips_list) num_snips_in_generator = len(snip_metadata_dicts) self.assertEqual(num_snips_in_generator, num_tst_snips)
def test_read_raven_selection_table(self): dict_list = Utils.read_raven_selection_table(self.raven_sel_tbl_path) desired = \ [{'Selection': '1', 'View': 'Spectrogram 1', 'Channel': '1', 'Begin Time (s)': 0.0, 'End Time (s)': 6.23740263, 'Low Freq (Hz)': 2088.175, 'High Freq (Hz)': 8538.314, 'species': 'vase', 'type': 'song', 'number': '1', 'mix': ['rbps','bgta','rcwp'], 'time_interval': {'low_val': 0.0,'high_val': 6.23740263}, 'freq_interval': {'low_val': 2088.175,'high_val': 8538.314} }, {'Selection': '18', 'View': 'Spectrogram 1', 'Channel': '1', 'Begin Time (s)': 1.024500915, 'End Time (s)': 3.294473531, 'Low Freq (Hz)': 2161.467, 'High Freq (Hz)': 4492.937, 'species': 'howp', 'type': 'call-1', 'number': '1', 'mix': ['rbps','vase'], 'time_interval': {'low_val': 1.024500915,'high_val': 3.294473531}, 'freq_interval': {'low_val': 2161.467,'high_val': 4492.937} }, {'Selection': '2', 'View': 'Spectrogram 1', 'Channel': '1', 'Begin Time (s)': 2.390074726, 'End Time (s)': 3.231216904, 'Low Freq (Hz)': 1564.1, 'High Freq (Hz)': 3519.1, 'species': 'rbps', 'type': 'song', 'number': '1', 'mix': [], 'time_interval': {'low_val': 2.390074726,'high_val': 3.231216904}, 'freq_interval': {'low_val': 1564.1,'high_val': 3519.1} }, {'Selection': '19', 'View': 'Spectrogram 1', 'Channel': '1', 'Begin Time (s)': 5.926034705, 'End Time (s)': 7.964992409, 'Low Freq (Hz)': 3944.33, 'High Freq (Hz)': 9791.219, 'species': 'bgta', 'type': 'call', 'number': '1', 'mix': ['howp'], 'time_interval': {'low_val': 5.926034705, 'high_val': 7.964992409}, 'freq_interval': {'low_val': 3944.33,'high_val': 9791.219} }] self.assertEqual(dict_list, desired)
def setUpClass(cls): cls.cur_dir = os.path.dirname(__file__) cls.data_dir = os.path.join(cls.cur_dir, 'data') cls.sel_tbl_dir = os.path.join(cls.data_dir, 'selection_tables') cls.sel_tbl_path1 = os.path.join( cls.sel_tbl_dir, 'DS_AM01_20190711_170000.Table.1.selections.txt') cls.sel_tbl_path2 = os.path.join( cls.sel_tbl_dir, 'DS_AM01_20190712_050000.Table.1.selections.txt') cls.tst_snips_dir = os.path.join(cls.data_dir, 'fld_snippets') # Get start-time sorted list of dicts, # each dict containing info of one selection # table: cls.sel_tbl_lst1 = Utils.read_raven_selection_table(cls.sel_tbl_path1) cls.sel_tbl_lst2 = Utils.read_raven_selection_table(cls.sel_tbl_path2) # For testing SelTblSnipsAssoc iteration: cls.tbl_snips_assoc_iter_test_dir = os.path.join( cls.data_dir, 'tbl_snippet_assoc_iter_data') cls.sel1 = { 'Begin Time (s)': 10, 'End Time (s)': 20, 'species': 'dog', 'mix': None } cls.sel2 = { 'Begin Time (s)': 25, 'End Time (s)': 30, 'species': 'dog', 'mix': ['species1'] } cls.sel3 = { 'Begin Time (s)': 30, 'End Time (s)': 40, 'species': 'cat', 'mix': ['species1'] } cls.sel4 = { 'Begin Time (s)': 50, 'End Time (s)': 60, 'species': 'dog', 'mix': ['species1', 'species2'] } # Case 1: left of all sels: cls.iv1 = Interval(5, 6) # Case 2 end reaches into sel1: cls.iv2 = Interval(6, 12) # Case 3 entirely enclosed in sel1: cls.iv3 = Interval(12, 16) # Case 4: only start is in sel1: cls.iv4 = Interval(14, 22) # Case 5: in no selection: cls.iv5 = Interval(22, 24) # Case 6: straddles two selections sel 2/3: cls.iv6 = Interval(28, 35) # Case 7: to the right of all sels (sel 4): cls.iv7 = Interval(65, 70) cls.sels = [cls.sel1, cls.sel2, cls.sel3, cls.sel4] warnings.filterwarnings("ignore", category=ResourceWarning, message='Implicitly cleaning') warnings.filterwarnings("ignore", category=ResourceWarning, message='unclosed file')