예제 #1
0
    def test_to_json_collection(self):

        coll = XenoCantoCollection.load(self.tst_file)

        jstr = coll.to_json()

        try:
            # Should be able to get a dict
            # back from the json string without
            # error:
            coll_as_dict = orjson.loads(jstr)
        except Exception as e:
            self.fail(f"Could not read back collection json: {repr(e)}")

        # Now have:
        #    {
        #      "phylo_nm1" : [{recording_dict1}, {recording_dict2}, ...],
        #      "phylo_nm2" : [{recording_dict1}, {recording_dict2}, ...],
        #                       ...
        #    }

        # Separately econstitute each recording into
        # a XenoCantoRecording, and ensure that its
        # phylo_name equals the collection entry's phil_name<n>:

        for phylo_nm, rec_dict_jstr_list in coll_as_dict.items():
            for rec_jstr in rec_dict_jstr_list:
                rec_obj = XenoCantoRecording.from_json(rec_jstr)
                self.assertEqual(rec_obj.phylo_name, phylo_nm)
예제 #2
0
    def test_load(self):
        coll = XenoCantoCollection.load(self.tst_file)
        phylo_names = list(coll.keys())

        # For each list of recordings, every
        # member's phylo_name should be the same
        # as the collection key:
        for phylo_name in phylo_names:
            rec_instances = coll[phylo_name]
            for recording_inst in rec_instances:
                self.assertEqual(recording_inst.phylo_name, phylo_name)
예제 #3
0
    def test_recording_to_json_xc_recording(self):
        coll = XenoCantoCollection.load(self.tst_file)
        rec = coll['Tangaragyrola'][0]

        jstr = rec.to_json()

        # Make Python dict from json:
        recovered_dict = eval(jstr)

        # The recovered dict must reflect
        # the instance vars of the XenoCantoRecording:

        for inst_var_nm, inst_var_val in recovered_dict.items():
            self.assertEqual(rec.__getattribute__(inst_var_nm), inst_var_val)
예제 #4
0
    def test_coll_json_to_file(self):
        coll = XenoCantoCollection.load(self.tst_file)

        tmp_obj = tempfile.NamedTemporaryFile(suffix='.json',
                                              prefix='xeno_canto_tst',
                                              dir='/tmp',
                                              delete=False)
        fname = tmp_obj.name

        new_coll = None
        try:
            # Write to file (which will already exist,
            # therefore the force, so no request for confimation:
            coll.to_json(dest=fname, force=True)

            # Get it back:
            new_coll = XenoCantoCollection.from_json(src=fname)
        finally:
            os.remove(fname)

        #**********
        #new_coll.__eq__(coll)
        #**********
        self.assertTrue(new_coll == coll)
예제 #5
0
    def test_from_json_xc_recording(self):

        coll = XenoCantoCollection.load(self.tst_file)

        rec = coll['Tangaragyrola'][0]
        jstr = rec.to_json()

        rec_recovered = XenoCantoRecording.from_json(jstr)

        for inst_var_nm, inst_var_val in rec_recovered.__dict__.items():
            if type(inst_var_val) == str:
                self.assertEqual(inst_var_val,
                                 rec.__getattribute__(inst_var_nm))
            elif inst_var_nm == 'log':
                # Inst var 'log' should be a LoggingService:
                self.assertEqual(type(inst_var_val), LoggingService)

        num_inst_vars_rec_orig = len(rec.__dict__.keys())
        num_inst_vars_rec_recovered = len(rec_recovered.__dict__.keys())

        # Ensure all inst vars are recovered:
        self.assertEqual(num_inst_vars_rec_orig, num_inst_vars_rec_recovered)
예제 #6
0
    def make_species_subdirs(self, coll_loc, download_dir):
        '''
        Create subdirectories below the download_dir, one
        for each species. Move recordings into the proper
        subdirs.

        :param coll_loc: path to previously saved XenoCantoCollection
        :type coll_loc: str
        :param download_dir: path to where the sound files are located
        :type download_dir: str
        '''
        coll = XenoCantoCollection(coll_loc)
        #*************
        all_recs = coll.all_recordings()
#         for rec in all_recs:
#             rec._filename = self._clean_filename(rec._filename)
#             type_upper = rec.type.upper() 
#             if type_upper in ['SONG', 'CALL']:
#                 curr_fn = rec._filename
#                 if curr_fn.startswith(type_upper):
#                     continue
#                 else:
#                     rec._filename = f"{type_upper}_{rec._filename}"
#         coll.save(coll_loc)
#         return
        #*************
        
        for species in coll.keys():
            try:
                os.mkdir(os.path.join(download_dir, species))
            except FileExistsError:
                # No problem
                pass
            
            # Move all sound files of this species
            # into the proper subdirectory:
            for rec in coll[species]:
                species_dir = os.path.join(download_dir,
                                           species
                                           ) 
                voc_type = rec.type.upper()
                    
                if voc_type.find('CALL') > -1:
                    voc_type = 'CALL'
                    rec.type = 'CALL'
                elif voc_type.find('SONG') > -1:
                    voc_type = 'SONG'
                    rec.type = 'SONG'
                    
                fname = rec._filename
                if voc_type in ('CALL', 'SONG'):
                    if not (fname.startswith('CALL_') or \
                            fname.startswith('SONG_')):
                        # If starts with UNKNOWN_, take
                        # that away first:
                        if fname.startswith('UNKNOWN_'):
                            fname = fname[len('UNKNOWN_'):]
                        fname = f"{voc_type}_{fname}"
                else:
                    fname = f"UNKNOWN_{rec._filename}"

                curr_path = os.path.join(download_dir,
                                        fname 
                                        )
                
                if not os.path.exists(curr_path):
                    print(f"Did not find {curr_path}")
                    continue
                # Update the record's _filename:
                # Replace crud from file name:
                clean_fname = self._clean_filename(fname)
                rec._filename = clean_fname
                dest = os.path.join(species_dir, clean_fname)
                shutil.move(curr_path, dest)
                
        # Save updated collection.
        # The save() method will not
        # overwrite, but create a new
        # json file:
        
        new_json_path = coll.save(coll_loc)
        print(f"Updated collection: {new_json_path}")
예제 #7
0
    def unknown_type_resolution(self, coll_loc, download_dir):
        '''
        Downloaded recordings often have non-standard
        type fields, making the distinction between song
        and call difficult. The downloaded sound files then
        end up with UNKNOWN_ prefixes. 
        
        This method loads the saved collection that contains
        the metadata for the downloaded files. It tries to 
        guess in each XenoCantoRecording instance the proper 
        type from the type fields. It corrects the type field
        if successful. The UNKNOWN_ prefixed file is then 
        moved to have the proper SONG_/CALL_ prefix.
        
        Prints remaining unresolved cases. 
        
        :param coll_loc: path to previously saved XenoCantoCollection
        :type coll_loc: str
        :param download_dir: path to where the sound files are located
        :type download_dir: str
        '''
        
        #coll_loc = '/Users/paepcke/EclipseWorkspacesNew/birds/src/birdsong/xeno_canto_collections/2021_03_26T18_07_39.560878.json'
        coll = XenoCantoCollection(coll_loc)
        #download_dir = '/Users/paepcke/Project/Wildlife/Birds/CostaRica/Data/DownloadedBirds/'
        all_recs = {}
        for rec_list in coll.values():
            for rec in rec_list:
                corr_fname = self._clean_filename(rec._filename)
                rec._filename = corr_fname
                all_recs['UNKNOWN_' + corr_fname] = rec

        self.all_recs = all_recs
        for fname in os.listdir(download_dir):
            try:
                if not fname.startswith('UNKNOWN_'):
                    continue 
                rec = all_recs[fname]
            except KeyError:
                print(f"Not fnd: {fname}")
                continue
            voc_type = rec.type.upper()
            if voc_type.find('CALL') > -1:
                voc_type = 'CALL'
            elif voc_type.find('SONG') > -1:
                voc_type = 'SONG'
            if voc_type in ['SONG', 'CALL']:
                # Standardize the type field:
                rec.type = voc_type
                new_fname = fname.replace('UNKNOWN_', 
                                          f"{voc_type}_")
                orig = os.path.join(download_dir, fname)
                new  = os.path.join(download_dir, new_fname)
                shutil.move(orig,new)
            # Still unknown:
            print(f"Unknown type: {voc_type}")


        # The save will automatically add
        # a uniquifier at the end of the json
        # file name:
        new_coll_loc = coll.save(coll_loc)
        print(f"Updated collection saved to {new_coll_loc}")