コード例 #1
0
    def __init__(self,
                 selection_tbl_loc,
                 spectrogram_locs,
                 out_dir,
                 unittesting=False):
        '''
        Create snippet copies into out_dir 
        for all snippets that are covered
        by any of the given selection tables.
        
        :param selection_tbl_loc: path to individual selection
            table or a directory containing selection tables.
            Each tbl is a tsv file with extension .txt
        :type selection_tbl_loc: str
        :param spectrogram_locs: individual or directory of 
            spectrogram snippets.
        :type spectrogram_locs: str
        :param out_dir: destination of snippet copies
        :type out_dir: src
        :param unittesting: if True, does not initialize
            the instance, or run any operations
        :type unittesting: bool
        '''

        if unittesting:
            return

        if not os.path.exists(selection_tbl_loc):
            print(f"Cannot open {selection_tbl_loc}")
            sys.exit(1)

        if not os.path.exists(spectrogram_locs):
            print(f"Spectrogram snippets {spectrogram_locs} not found")
            sys.exit(1)

        # Is path to sel tbl an individual tsv file?
        if os.path.isfile(selection_tbl_loc):
            table_paths = iter([selection_tbl_loc])
        else:
            # Caller gave directory of .csv files.
            # Get them all recursively:
            table_paths = Utils.find_in_tree_gen(selection_tbl_loc,
                                                 pattern="*.txt")

        # Is snippets path to an individual .png snippet file?
        if os.path.isfile(spectrogram_locs):
            snippet_paths = iter([spectrogram_locs])
        else:
            # Caller gave directory of .png files.
            # Get them all recursively:
            snippet_paths = Utils.find_in_tree_gen(spectrogram_locs,
                                                   pattern="*.png")
        # If out_dir does not exist, create it,
        # and all dirs along the path:
        if not os.path.exists(out_dir):
            os.makedirs(out_dir)

        # Get dict:
        #    {<recording-id> : SelTblSnipsAssoc-instance}
        # where each SelTblSnipsAssoc instance is a generator
        # of snippet metadata from snippet that are covered in
        # the selection table that is associated with the instance.
        # In addition the absolute snippet path is added as
        # entry of key 'snip_path'.
        #
        # The generator feeds out the snippet metadata in order of
        # start time.
        #
        # For brevity, call each instance of SelTblSnipsAssoc
        # an 'assoc'

        rec_id_assocs = self.create_snips_gen_for_sel_tbls(
            snippet_paths, table_paths)

        for assoc in rec_id_assocs.values():
            # The assoc focuses on a single selection
            # table, and the snippets it covers.
            # Get the info contained in each row of
            # the sel tb. This will be a list of dicts, each with
            # the information from one selection tbl row:

            selections = Utils.read_raven_selection_table(
                assoc.raven_sel_tbl_path)

            # Go through each snippet in the association, enrich its
            # metadata with species info. Then copy the enriched
            # snippet to the target dir:

            for snip_metadata in iter(assoc):
                self.match_snippet(selections, snip_metadata, out_dir)
コード例 #2
0
    def test_match_snippet(self):

        # Get list of dicts; each dict will contain
        # info from one row in the selection table
        # at self.sel_tbl_path1:

        sel_dicts = Utils.read_raven_selection_table(self.sel_tbl_path1)

        # The list is expected to look like this:
        #
        #      [{'Selection': '5',
        #         'View': 'Spectrogram 1',
        #         'Channel': '1',
        #         'Begin Time (s)': 0.014506764,
        #         'End Time (s)': 4.656671354,
        #         'Low Freq (Hz)': 345.6,
        #         'High Freq (Hz)': 22050.0,
        #         'Delta Time (s)': '4.6422',
        #         'species': 'no bird',
        #         'type': '',
        #         'number': '',
        #         'mix': [],
        #         'time_interval': {'low_val': 0.014506764,
        #         'high_val': 4.656671354},
        #         'freq_interval': {'low_val': 345.6,
        #                           'high_val': 22050.0}
        #       },
        #      ...
        #      ]
        #
        # and there should be as many entries as there
        # are rows in the table:

        with open(self.sel_tbl_path1, 'r') as fd:
            all_lines = fd.readlines()
            # The '-1' subtracts the sel table's
            # column header line:
            self.assertEqual(len(sel_dicts), len(all_lines) - 1)

        # Create a dict:
        #    {<recording-id>  : SelectionTblSnipsAssoc-instance}
        #
        # Each SelectionTblSnipsAssoc instance is a generator of snippets
        # from one recording. The recording-id is the part of
        # field recording and selection table file names like:
        #    AM01_20190712_050000

        rec_id_to_sel_tbl_snips_gens = self.mapper.create_snips_gen_for_sel_tbls(
            self.tst_snips_dir, self.sel_tbl_dir)

        # We have two test select tables, so the
        # number of SelectionTblSnipsAssoc instances
        # should match:

        num_sel_tbls = len(os.listdir(self.sel_tbl_dir))
        self.assertEqual(len(rec_id_to_sel_tbl_snips_gens), num_sel_tbls)

        # Examine the rec-id-->snippet-generator for
        # recording AM01_20190711_170000:
        assoc170k = rec_id_to_sel_tbl_snips_gens['AM01_20190711_170000']

        self.assertEqual(assoc170k.rec_id, 'AM01_20190711_170000')
        self.assertTrue(isinstance(assoc170k.snip_dir, Iterable))

        # Get number of snippets with recording id
        # of 'AM01_20190711_170000' that we have among
        # the test snippets:
        snip_metadata_dicts = list(assoc170k)
        snip_fnames = [
            snip_metadata_dict['snip_path']
            for snip_metadata_dict in snip_metadata_dicts
        ]
        snips_fname_filter = filter(self.mapper.extract_recording_id,
                                    snip_fnames)
        snips_list = list(snips_fname_filter)
        num_tst_snips = len(snips_list)

        num_snips_in_generator = len(snip_metadata_dicts)
        self.assertEqual(num_snips_in_generator, num_tst_snips)
コード例 #3
0
    def test_read_raven_selection_table(self):

        dict_list = Utils.read_raven_selection_table(self.raven_sel_tbl_path)

        desired = \
                [{'Selection': '1',
                'View': 'Spectrogram 1',
                'Channel': '1',
                'Begin Time (s)': 0.0,
                'End Time (s)': 6.23740263,
                'Low Freq (Hz)': 2088.175,
                'High Freq (Hz)': 8538.314,
                'species': 'vase',
                'type': 'song',
                'number': '1',
                'mix': ['rbps','bgta','rcwp'],
                'time_interval': {'low_val': 0.0,'high_val': 6.23740263},
                'freq_interval': {'low_val': 2088.175,'high_val': 8538.314}
                },

                {'Selection': '18',
                'View': 'Spectrogram 1',
                'Channel': '1',
                'Begin Time (s)': 1.024500915,
                'End Time (s)': 3.294473531,
                'Low Freq (Hz)': 2161.467,
                'High Freq (Hz)': 4492.937,
                'species': 'howp',
                'type': 'call-1',
                'number': '1',
                'mix': ['rbps','vase'],
                'time_interval': {'low_val': 1.024500915,'high_val': 3.294473531},
                'freq_interval': {'low_val': 2161.467,'high_val': 4492.937}
                },

                {'Selection': '2',
                'View': 'Spectrogram 1',
                'Channel': '1',
                'Begin Time (s)': 2.390074726,
                'End Time (s)': 3.231216904,
                'Low Freq (Hz)': 1564.1,
                'High Freq (Hz)': 3519.1,
                'species': 'rbps',
                'type': 'song',
                'number': '1',
                'mix': [],
                'time_interval': {'low_val': 2.390074726,'high_val': 3.231216904},
                'freq_interval': {'low_val': 1564.1,'high_val': 3519.1}
                },

                {'Selection': '19',
                'View': 'Spectrogram 1',
                'Channel': '1',
                'Begin Time (s)': 5.926034705,
                'End Time (s)': 7.964992409,
                'Low Freq (Hz)': 3944.33,
                'High Freq (Hz)': 9791.219,
                'species': 'bgta',
                'type': 'call',
                'number': '1',
                'mix': ['howp'],
                'time_interval': {'low_val': 5.926034705, 'high_val': 7.964992409},
                'freq_interval': {'low_val': 3944.33,'high_val': 9791.219}
                }]
        self.assertEqual(dict_list, desired)
コード例 #4
0
    def setUpClass(cls):
        cls.cur_dir = os.path.dirname(__file__)
        cls.data_dir = os.path.join(cls.cur_dir, 'data')
        cls.sel_tbl_dir = os.path.join(cls.data_dir, 'selection_tables')
        cls.sel_tbl_path1 = os.path.join(
            cls.sel_tbl_dir, 'DS_AM01_20190711_170000.Table.1.selections.txt')
        cls.sel_tbl_path2 = os.path.join(
            cls.sel_tbl_dir, 'DS_AM01_20190712_050000.Table.1.selections.txt')
        cls.tst_snips_dir = os.path.join(cls.data_dir, 'fld_snippets')

        # Get start-time sorted list of dicts,
        # each dict containing info of one selection
        # table:
        cls.sel_tbl_lst1 = Utils.read_raven_selection_table(cls.sel_tbl_path1)
        cls.sel_tbl_lst2 = Utils.read_raven_selection_table(cls.sel_tbl_path2)

        # For testing SelTblSnipsAssoc iteration:
        cls.tbl_snips_assoc_iter_test_dir = os.path.join(
            cls.data_dir, 'tbl_snippet_assoc_iter_data')

        cls.sel1 = {
            'Begin Time (s)': 10,
            'End Time (s)': 20,
            'species': 'dog',
            'mix': None
        }

        cls.sel2 = {
            'Begin Time (s)': 25,
            'End Time (s)': 30,
            'species': 'dog',
            'mix': ['species1']
        }

        cls.sel3 = {
            'Begin Time (s)': 30,
            'End Time (s)': 40,
            'species': 'cat',
            'mix': ['species1']
        }
        cls.sel4 = {
            'Begin Time (s)': 50,
            'End Time (s)': 60,
            'species': 'dog',
            'mix': ['species1', 'species2']
        }

        # Case 1: left of all sels:
        cls.iv1 = Interval(5, 6)
        # Case 2 end reaches into sel1:
        cls.iv2 = Interval(6, 12)
        # Case 3 entirely enclosed in sel1:
        cls.iv3 = Interval(12, 16)
        # Case 4: only start is in sel1:
        cls.iv4 = Interval(14, 22)
        # Case 5: in no selection:
        cls.iv5 = Interval(22, 24)
        # Case 6: straddles two selections sel 2/3:
        cls.iv6 = Interval(28, 35)
        # Case 7: to the right of all sels (sel 4):
        cls.iv7 = Interval(65, 70)

        cls.sels = [cls.sel1, cls.sel2, cls.sel3, cls.sel4]

        warnings.filterwarnings("ignore",
                                category=ResourceWarning,
                                message='Implicitly cleaning')
        warnings.filterwarnings("ignore",
                                category=ResourceWarning,
                                message='unclosed file')