def test_is_downloaded(self, tmp_path, gmb_schema): "Test is_downloaded method." data_dir = tmp_path / 'non-existing-dir' assert not data_dir.exists() # Sanity check: data_dir must not exist gmb = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.LAZY) assert gmb.is_downloaded() is False gmb.download() assert gmb.is_downloaded() is True # content of the file list with open(gmb._file_list_file, mode='r') as f: file_list = json.load(f) def test_incorrect_file_list(change: dict): "Test a single case that somewhere in the file list things are wrong." wrong_file_list = copy.deepcopy(file_list) wrong_file_list.update(change) with open(gmb._file_list_file, mode='w') as f: json.dump(wrong_file_list, f) assert gmb.is_downloaded() is False # Can't find a file test_incorrect_file_list( {'non-existing-file': { 'type': int(tarfile.REGTYPE) }}) # File type incorrect test_incorrect_file_list({ 'groningen_meaning_bank_modified': { 'type': int(tarfile.REGTYPE) } }) test_incorrect_file_list({ 'groningen_meaning_bank_modified/LICENSE.txt': { 'type': int(tarfile.DIRTYPE) } }) test_incorrect_file_list({ 'groningen_meaning_bank_modified/README.txt': { 'type': int(tarfile.SYMTYPE) } }) # size incorrect changed = copy.deepcopy( file_list['groningen_meaning_bank_modified/README.txt']) changed['size'] += 100 test_incorrect_file_list( {'groningen_meaning_bank_modified/README.txt': changed}) # JSON decoding error gmb._file_list_file.write_text("nonsense\n", encoding='utf-8') with pytest.raises(JSONDecodeError): # We don't check the value of the exception because we clearly only are only interested in ensuring that the # file isn't decodable gmb.is_downloaded()
def test_deleting_data_dir(self, tmp_path, gmb_schema): "Test ``Dataset.delete()``." # Note we don't use tmp_sub_dir fixture because we want data_dir to be non-existing at the beginning of the # test. data_dir = tmp_path / 'data-dir' dataset = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.LAZY) assert not data_dir.exists() # sanity check: data_dir doesn't exist dataset.delete() # no exception should be raised here assert not data_dir.exists() # sanity check: data_dir doesn't exist dataset.download() # Sanity check: Files are in place assert dataset.is_downloaded() assert len(os.listdir(data_dir)) > 0 # Delete the dir dataset.delete() assert not data_dir.exists()