def test_annot_list_to_csv(tmp_output_dir, test_data_dir): """compares csv created by annot_list_to_csv with correctly generated csv saved in hvc/tests/test_data """ cbin_dir = os.path.join(test_data_dir, os.path.normpath('cbins/gy6or6/032312/')) notmat_list = glob(os.path.join(cbin_dir, '*.not.mat')) # below, sorted() so it's the same order on different platforms notmat_list = sorted(notmat_list) annot_list = [] for notmat in notmat_list: annot_list.append(annotation.notmat_to_annot_dict(notmat)) csv_filename = os.path.join(str(tmp_output_dir), 'test.csv') # below, set basename to True so we can easily run tests on any system without # worrying about where audio files are relative to root of directory tree annotation.annot_list_to_csv(annot_list, csv_filename, basename=True) assert os.path.isfile(csv_filename) test_rows = [] with open(csv_filename, 'r', newline='') as csvfile: reader = csv.reader(csvfile) for row in reader: test_rows.append(row) csv_to_compare_with = os.path.join( test_data_dir, os.path.normpath('csv/gy6or6_032312.csv')) compare_rows = [] with open(csv_to_compare_with, 'r', newline='') as csvfile: reader = csv.reader(csvfile) for row in reader: compare_rows.append(row) for test_row, compare_row in zip(test_rows, compare_rows): assert test_row == compare_row
def test_song_w_nan(self, has_window_error, hvc_source_dir): """tests that features_arr[ind,:] == np.nan where ind is the row corresponding to a syllable from a song for which a spectrogram could not be generated, and so single-syllable features cannot be extracted from it """ with open(os.path.join(hvc_source_dir, os.path.normpath('parse/feature_groups.yml')) ) as ftr_grp_yaml: valid_feature_groups_dict = yaml.load(ftr_grp_yaml) spect_params = refs_dict['koumura'] segment_params = { 'threshold': 1500, 'min_syl_dur': 0.01, 'min_silent_dur': 0.006 } svm_features = valid_feature_groups_dict['svm'] fe = hvc.features.extract.FeatureExtractor(spect_params=spect_params, segment_params=segment_params, feature_list=svm_features) filename, index = has_window_error annotation_dict = annotation.notmat_to_annot_dict(filename + '.not.mat') with pytest.warns(UserWarning): extract_dict = fe._from_file(filename=filename, file_format='evtaf', labels_to_use='iabcdefghjk', labels=annotation_dict['labels'], onsets_Hz=annotation_dict['onsets_Hz'], offsets_Hz=annotation_dict['offsets_Hz']) ftr_arr = extract_dict['features_arr'] assert np.alltrue(np.isnan(ftr_arr[19, :]))
def test_csv_to_annot_list(test_data_dir): csv_fname = os.path.join(test_data_dir, os.path.normpath('csv/gy6or6_032312.csv')) # convert csv to annotation list -- this is what we're testing annot_list_from_csv = annotation.csv_to_annot_list(csv_fname) cbin_dir = os.path.join(test_data_dir, os.path.normpath('cbins/gy6or6/032312/')) # get what should be the same annotation list from .not.mat files # to compare with what we got from the csv notmat_list = glob(os.path.join(cbin_dir, '*.not.mat')) # below, sorted() so it's the same order on different platforms notmat_list = sorted(notmat_list) annot_list_from_notmats = [] for notmat in notmat_list: annot_list_from_notmats.append( annotation.notmat_to_annot_dict(notmat, basename=True)) # make sure everything is the same in the two annotation lists for from_csv, from_notmat in zip(annot_list_from_csv, annot_list_from_notmats): for from_csv_key, from_csv_val in from_csv.items(): if type(from_csv_val) == str: assert from_csv_val == from_notmat[from_csv_key] elif type(from_csv_val) == np.ndarray: # hacky platform-agnostic way to say "if integer" if from_csv_val.dtype == np.asarray(int(1)).dtype: assert np.array_equal(from_csv_val, from_notmat[from_csv_key]) # hacky platform-agnostic way to say "if float" elif from_csv_val.dtype == np.asarray((1.)).dtype: assert np.allclose(from_csv[from_csv_key], from_notmat[from_csv_key])
def a_syl(self, test_data_dir): """make a syl object Should get fancy later and have this return random syls for more thorough testing Returns ------- a_syl: a syl object used to text feature extraction functions """ songfiles_dir = os.path.join( test_data_dir, os.path.normpath('cbins/gy6or6/032412/*.cbin')) songfiles_list = glob(songfiles_dir) first_song = songfiles_list[0] raw_audio, samp_freq = hvc.evfuncs.load_cbin(first_song) first_song_notmat = first_song + '.not.mat' annotation_dict = annotation.notmat_to_annot_dict(first_song_notmat) spect_params = refs_dict['tachibana'] spect_maker = Spectrogram(**spect_params) syls = make_syls(raw_audio, samp_freq, spect_maker, annotation_dict['labels'], annotation_dict['onsets_Hz'], annotation_dict['offsets_Hz']) return syls[0]
def test_notmat_to_annot_dict(test_data_dir): notmat = os.path.join( test_data_dir, os.path.normpath('cbins/gy6or6/032412/' 'gy6or6_baseline_240312_0811.1165.cbin.not.mat')) annot_dict = annotation.notmat_to_annot_dict(notmat) for fieldname, fieldtype in ANNOT_DICT_FIELDNAMES.items(): assert fieldname in annot_dict assert type(annot_dict[fieldname]) == fieldtype
def test_cbin(self, hvc_source_dir, test_data_dir): """tests all features on a single .cbin file""" spect_params = refs_dict['tachibana'] segment_params = { 'threshold': 1500, 'min_syl_dur': 0.01, 'min_silent_dur': 0.006 } with open(os.path.join( hvc_source_dir, os.path.normpath('parse/feature_groups.yml'))) as ftr_grp_yaml: ftr_grps = yaml.load(ftr_grp_yaml) cbin = os.path.join(test_data_dir, os.path.normpath( 'cbins/gy6or6/032412/' 'gy6or6_baseline_240312_0811.1165.cbin')) annotation_dict = annotation.notmat_to_annot_dict(cbin + '.not.mat') for feature_list in (ftr_grps['knn'], ftr_grps['svm'], ['flatwindow'], ): fe = hvc.features.extract.FeatureExtractor(spect_params=spect_params, segment_params=segment_params, feature_list=feature_list) extract_dict = fe._from_file(cbin, file_format='evtaf', labels_to_use='iabcdefghjk', labels=annotation_dict['labels'], onsets_Hz=annotation_dict['onsets_Hz'], offsets_Hz=annotation_dict['offsets_Hz'] ) if 'features_arr' in extract_dict: ftrs = extract_dict['features_arr'] feature_inds = extract_dict['feature_inds'] # _from_file should return an ndarray assert type(ftrs) == np.ndarray # and the number of columns should equal tbe number of feature indices # that _from_file determined there were (not necessarily equal to the # number of features in the list; some features such as the spectrogram # averaged over columns occupy several columns assert ftrs.shape[-1] == feature_inds.shape[-1] # however the **unique** number of features in feature indices should be # equal to the number of items in the feature list assert np.unique(feature_inds).shape[-1] == len(feature_list) elif 'neuralnet_inputs_dict' in extract_dict: neuralnet_ftrs = extract_dict['neuralnet_inputs_dict'] assert type(neuralnet_ftrs) == dict else: raise ValueError('neither features_arr or neuralnet_inputs_dict ' 'were returned by FeatureExtractor')
def test_window_error_set_to_nan(self, has_window_error): """check that, if an audio file raises a window error for Spectrogram.make for a certain syllable, then that syllable's spectrogram is set to np.nan """ filename, index = has_window_error raw_audio, samp_freq = hvc.evfuncs.load_cbin(filename) spect_params = hvc.parse.ref_spect_params.refs_dict['koumura'] spect_maker = hvc.audiofileIO.Spectrogram(**spect_params) annotation_dict = annotation.notmat_to_annot_dict(filename + '.not.mat') syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annotation_dict['labels'], annotation_dict['onsets_Hz'], annotation_dict['offsets_Hz']) assert syls[index].spect is np.nan
def test_make_syls(self, test_data_dir): """test make_syls function """ segment_params = { 'threshold': 1500, 'min_syl_dur': 0.01, 'min_silent_dur': 0.006 } # test that make_syl_spects works # with spect params given individually cbin = os.path.join( test_data_dir, os.path.normpath('cbins/gy6or6/032412/' 'gy6or6_baseline_240312_0811.1165.cbin')) raw_audio, samp_freq = hvc.evfuncs.load_cbin(cbin) spect_params = { 'nperseg': 512, 'noverlap': 480, 'freq_cutoffs': [1000, 8000] } labels_to_use = 'iabcdefghjk' spect_maker = hvc.audiofileIO.Spectrogram(**spect_params) annot_dict = annotation.notmat_to_annot_dict(cbin + '.not.mat') syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use) wav = os.path.join(test_data_dir, os.path.normpath('koumura/Bird0/Wave/0.wav')) samp_freq, raw_audio = wavfile.read(wav) annot_dict = hvc.koumura.load_song_annot(wav) labels_to_use = '0123456' syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use) # test make_syl_spects works with 'ref' set to 'tachibana' raw_audio, samp_freq = hvc.evfuncs.load_cbin(cbin) spect_params = hvc.parse.ref_spect_params.refs_dict['tachibana'] spect_maker = hvc.audiofileIO.Spectrogram(**spect_params) annot_dict = annotation.notmat_to_annot_dict(cbin + '.not.mat') labels_to_use = 'iabcdefghjk' syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use) wav = os.path.join(test_data_dir, os.path.normpath('koumura/Bird0/Wave/0.wav')) samp_freq, raw_audio = wavfile.read(wav) labels_to_use = '0123456' annot_dict = hvc.koumura.load_song_annot(wav) syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use) # test make_syl_spects works with 'ref' set to 'koumura' raw_audio, samp_freq = hvc.evfuncs.load_cbin(cbin) spect_params = hvc.parse.ref_spect_params.refs_dict['koumura'] spect_maker = hvc.audiofileIO.Spectrogram(**spect_params) annot_dict = annotation.notmat_to_annot_dict(cbin + '.not.mat') labels_to_use = 'iabcdefghjk' syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use) wav = os.path.join(test_data_dir, os.path.normpath('koumura/Bird0/Wave/0.wav')) samp_freq, raw_audio = wavfile.read(wav) labels_to_use = '0123456' annot_dict = hvc.koumura.load_song_annot(wav) syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use) # test that make_syl_spects works the same way when # using evsonganaly raw_audio, samp_freq = hvc.evfuncs.load_cbin(cbin) spect_params = hvc.parse.ref_spect_params.refs_dict['evsonganaly'] spect_maker = hvc.audiofileIO.Spectrogram(**spect_params) annot_dict = annotation.notmat_to_annot_dict(cbin + '.not.mat') labels_to_use = 'iabcdefghjk' syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use) wav = os.path.join(test_data_dir, os.path.normpath('koumura/Bird0/Wave/0.wav')) samp_freq, raw_audio = wavfile.read(wav) annot_dict = hvc.koumura.load_song_annot(wav) labels_to_use = '0123456' syls = hvc.audiofileIO.make_syls(raw_audio, samp_freq, spect_maker, annot_dict['labels'], annot_dict['onsets_Hz'], annot_dict['offsets_Hz'], labels_to_use=labels_to_use)