def test_dataset_download(self, tmp_path, gmb_schema): "Test Dataset class downloads a dataset properly." data_dir = tmp_path / 'gmb' gmb_dataset = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.DOWNLOAD_ONLY) assert len(list(data_dir.iterdir( ))) == 2 # 'groningen_meaning_bank_modified' and '.nourish.dataset' unarchived_data_dir = data_dir / 'groningen_meaning_bank_modified' unarchived_data_dir_files = [ 'gmb_subset_full.txt', 'LICENSE.txt', 'README.txt' ] assert unarchived_data_dir.is_dir() assert len(list( unarchived_data_dir.iterdir())) == len(unarchived_data_dir_files) assert all(f.name in unarchived_data_dir_files for f in unarchived_data_dir.iterdir()) # Force check previously downloaded dataset should error with pytest.raises(RuntimeError) as e: gmb_dataset.download(check=True) assert str(e.value) == ( 'Dataset.download() was previously called. To overwrite existing data files, rerun ' 'Dataset.download() with ``check`` set to ``False``.')
def test_is_downloaded(self, tmp_path, gmb_schema): "Test is_downloaded method using a ``.tar.gz`` archive." data_dir = tmp_path / 'non-existing-dir' assert not data_dir.exists() # Sanity check: data_dir must not exist gmb = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.LAZY) assert gmb.is_downloaded() is False gmb.download() assert gmb.is_downloaded() is True # JSON decoding error gmb._file_list_file.write_text("nonsense\n", encoding='utf-8') with pytest.raises(JSONDecodeError): # We don't check the value of the exception because we clearly only are only interested in ensuring that the # file isn't decodable gmb.is_downloaded()
def test_unloaded_access_to_data(self, tmp_path, gmb_schema): "Test access to ``Dataset.data`` when no data has been loaded." dataset = Dataset(gmb_schema, data_dir=tmp_path, mode=Dataset.InitializationMode.LAZY) with pytest.raises(RuntimeError) as e: dataset.data assert str(e.value) == ( 'Data has not been downloaded and/or loaded yet. Call Dataset.download() to download ' 'data, call Dataset.load() to load data.') # Same after downloading dataset.download() with pytest.raises(RuntimeError) as e: dataset.data assert str(e.value) == ( 'Data has not been downloaded and/or loaded yet. Call Dataset.download() to download ' 'data, call Dataset.load() to load data.')
def test_deleting_data_dir(self, tmp_path, gmb_schema): "Test ``Dataset.delete()``." # Note we don't use tmp_sub_dir fixture because we want data_dir to be non-existing at the beginning of the # test. data_dir = tmp_path / 'data-dir' dataset = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.LAZY) assert not data_dir.exists() # sanity check: data_dir doesn't exist dataset.delete() # no exception should be raised here assert not data_dir.exists() # sanity check: data_dir doesn't exist dataset.download() # Sanity check: Files are in place assert dataset.is_downloaded() assert len(os.listdir(data_dir)) > 0 # Delete the dir dataset.delete() assert not data_dir.exists()