def test_Collection___init__(test_obs, rwc_obs, rwc_root): dset = model.Collection(test_obs) assert dset is not None with pytest.raises(model.MissingDataException): dset = model.Collection(test_obs, strict=True) obs = model.Observation(**rwc_obs) with pytest.raises(model.MissingDataException): dset = model.Collection([obs], audio_root='', strict=True) dset = model.Collection([obs], audio_root=rwc_root, strict=True) assert dset is not None
def test_Collection_validate(test_obs, rwc_obs): dset = model.Collection(test_obs) assert dset.validate(verbose=True, check_files=False) rwc_obs['duration'] = 'abcdef' dset.append(rwc_obs) assert not dset.validate(verbose=True)
def test_audio_to_observations(uiowa_root, onset_root, workspace): audio_file = os.path.join( uiowa_root, "theremin.music.uiowa.edu/sound files/MIS/Brass/tuba" "/Tuba.ff.C3C4.aiff") assert os.path.exists(audio_file) index = "uiowa78fae0a0" onsets_file = os.path.join(onset_root, 'uiowa', "{}.csv".format(index)) assert os.path.exists(onsets_file) output_dir = os.path.join(workspace, 'notes_tmp') utils.create_directory(output_dir) fext = 'flac' observations = SC.audio_to_observations( index, audio_file, onsets_file, output_dir, file_ext=fext, instrument='Tuba', dataset='uiowa') onset_df = pd.read_csv(onsets_file) assert len(observations) == len(onset_df) coll = model.Collection(observations, output_dir) assert coll.validate(verbose=True) for obs in coll.values(): obs.instrument == 'Tuba' assert len(set([obs.index for obs in observations])) == len(observations)
def test_Collection_to_read_json(test_obs, workspace): dset = model.Collection(test_obs) json_path = os.path.join(workspace, "dummy_collection.json") sdata = dset.to_json() assert sdata is not None dset.to_json(json_path) new_dset = dset.read_json(json_path) assert dset == new_dset
def test_partition_collection(test_bigobs): dset = model.Collection(test_bigobs) dset_df = dset.to_dataframe() split = 0.5 test_set = 'philharmonia' partition_df = model.partition_collection(dset, test_set='rwc', train_val_split=split) assert len(partition_df) == len(dset) # Make sure that the 'test' indeces were all allcoated to 'rwc' points assert np.all(dset_df.loc[partition_df[partition_df['partition'] == 'test'].index]['dataset'] == 'rwc') # And vice versa assert np.all(dset_df.loc[partition_df[ partition_df['partition'] != 'test'].index]['dataset'] != 'rwc') not_test_df = partition_df[partition_df['partition'] != 'test'] train_percent = (len(not_test_df[not_test_df['partition'] == 'train']) / float(not_test_df.size)) assert train_percent > 0 and train_percent < 1. np.testing.assert_approx_equal(train_percent, split, 1) valid_percent = (len(not_test_df[not_test_df['partition'] == 'valid']) / float(not_test_df.size)) assert valid_percent > 0 and valid_percent < 1. np.testing.assert_approx_equal(valid_percent, split, 1) # Make sure that there is no source_index cross contamination train_df = dset_df.loc[partition_df[partition_df['partition'] == 'train'].index] valid_df = dset_df.loc[partition_df[partition_df['partition'] == 'valid'].index] train_sources = set(train_df.source_index.values) valid_sources = set(valid_df.source_index.values) assert len(valid_sources.intersection(train_sources)) == 0
def test_Collection_view(test_obs): ds = model.Collection(test_obs) rwc_view = ds.view(column='dataset', filter_value="rwc").to_dataframe() assert set(rwc_view["dataset"].unique()) == set(["rwc"])
def test_Collection_to_dataframe(test_obs): dset = model.Collection(test_obs).to_dataframe() assert len(dset) == len(test_obs) assert dset.index[0] == test_obs[0]['index']
def test_Collection_to_builtin(test_obs): dset = model.Collection(test_obs) rec_obs = dset.to_builtin() assert rec_obs == test_obs
def test_Collection_append(test_obs, rwc_obs): dset = model.Collection(test_obs) assert len(dset) == len(test_obs) dset.append(rwc_obs) assert len(dset) == len(test_obs) + 1
def test_Collection_items(test_obs): dset = model.Collection(test_obs) items = dset.items() assert len(items) == len(test_obs) assert [x[0] for x in items] == [y['index'] for y in test_obs]
def test_Collection___eq__(test_obs): dset = model.Collection(test_obs) dset2 = model.Collection(test_obs) assert dset == dset2 assert dset != test_obs assert dset is not None
def test_Collection___len__(test_obs): dset = model.Collection(test_obs) assert len(dset) == len(test_obs)