コード例 #1
    def test_data_dir(self, tmp_path, gmb_schema):
        "Test ``Dataset._data_dir``."
        # Automatic creation
        dataset = Dataset(gmb_schema, data_dir=tmp_path / 'data_dir', mode=Dataset.InitializationMode.LAZY)
        assert dataset._data_dir == tmp_path / 'data_dir'

        # Non-directory present
        dataset = Dataset(gmb_schema, data_dir='setup.py', mode=Dataset.InitializationMode.LAZY)
        with pytest.raises(NotADirectoryError) as e:
        assert str(e.value) == f'"{pathlib.Path.cwd()/"setup.py"}" exists and is not a directory.'
コード例 #2
    def test_unloaded_access_to_data(self, tmp_path, gmb_schema):
        "Test access to ``Dataset.data`` when no data has been loaded."

        dataset = Dataset(gmb_schema, data_dir=tmp_path, mode=Dataset.InitializationMode.LAZY)
        with pytest.raises(RuntimeError) as e:
        assert str(e.value) == ('Data has not been downloaded and/or loaded yet. Call Dataset.download() to download '
                                'data, call Dataset.load() to load data.')

        # Same after downloading
        with pytest.raises(RuntimeError) as e:
        assert str(e.value) == ('Data has not been downloaded and/or loaded yet. Call Dataset.download() to download '
                                'data, call Dataset.load() to load data.')
コード例 #3
    def test_download_false(self, tmp_path, gmb_schema):
        "Test to see the function loads properly when download=False and dataset was previously downloaded."

        data_dir = tmp_path / 'gmb' / '1.0.2'
        gmb = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.DOWNLOAD_AND_LOAD)
        gmb_data = load_dataset('gmb', version='1.0.2', download=False)
        assert gmb.data == gmb_data
コード例 #4
    def test_invalid_sha512(self, tmp_path, gmb_schema):
        "Test if Dataset class catches an invalid hash."

        gmb_schema['sha512sum'] = 'invalid hash example'

        with pytest.raises(IOError) as e:
            Dataset(gmb_schema, data_dir=tmp_path, mode=Dataset.InitializationMode.DOWNLOAD_ONLY)
        assert 'the file may by corrupted' in str(e.value)
コード例 #5
ファイル: test_high_level.py プロジェクト: SSaishruthi/pydax
    def test_default_dataset_schema_name(self, tmp_path, gmb_schema):
        "Test the default schemata name."

        data_dir = tmp_path / 'default' / 'gmb' / '1.0.2'
        gmb = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.DOWNLOAD_AND_LOAD)
        _get_schemata().schemata['datasets']._schema.pop('name')  # Remove the "name" key
        gmb_data = load_dataset('gmb', version='1.0.2', download=False)
        assert gmb.data == gmb_data
コード例 #6
    def test_invalid_tarball(self, tmp_path, gmb_schema, schema_file_https_url, schema_file_relative_dir):
        "Test if Dataset class catches an invalid tar file."

        fake_schema = gmb_schema
        fake_schema['download_url'] = schema_file_https_url + '/datasets.yaml'
        fake_schema['sha512sum'] = hashlib.sha512((schema_file_relative_dir / 'datasets.yaml').read_bytes()).hexdigest()

        with pytest.raises(tarfile.ReadError) as e:
            Dataset(fake_schema, data_dir=tmp_path, mode=Dataset.InitializationMode.DOWNLOAD_ONLY)
        assert 'Failed to unarchive' in str(e.value)
コード例 #7
    def test_deleting_data_dir(self, tmp_path, gmb_schema):
        "Test ``Dataset.delete()``."

        # Note we don't use tmp_sub_dir fixture because we want data_dir to be non-existing at the beginning of the
        # test.
        data_dir = tmp_path / 'data-dir'
        dataset = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.LAZY)
        assert not data_dir.exists()  # sanity check: data_dir doesn't exist
        dataset.delete()  # no exception should be raised here
        assert not data_dir.exists()  # sanity check: data_dir doesn't exist

        # Sanity check: Files are in place
        assert dataset.is_downloaded()
        assert len(os.listdir(data_dir)) > 0
        # Delete the dir
        assert not data_dir.exists()
コード例 #8
    def test_download_data_dir_is_not_a_dir(self, gmb_schema):
        "Test when downloading when ``data_dir`` exists and is not a dir."

        # These are raised by pathlib.Path.mkdir
        # Also see https://bugs.python.org/issue42872
        ExceptionClass = FileExistsError if os.name == 'nt' else NotADirectoryError
        with pytest.raises(ExceptionClass) as e:
            Dataset(gmb_schema, data_dir='./setup.py', mode=Dataset.InitializationMode.DOWNLOAD_ONLY)
        # This error message may be generated by pathlib.Path.mkdir() (as in DirectoryLock.lock()). We only make sure
        # the path is in the string.
        # On Windows, backslashes in the error message are doubled:
        #   "[WinError 183] Cannot create a file when that file already exists: 'D:\\\\a\\\\pydax\\\\pydax\\\\setup.py'"
        assert str(pathlib.Path.cwd() / "setup.py").replace('\\', '\\\\') in str(e.value)
コード例 #9
    def test_constructor_download_and_load(self, tmp_path, wikitext103_schema):
        "Test the full power of Dataset.__init__() (mode being ``InitializationMode.DOWNLOAD_AND_LOAD``)."

        dataset = Dataset(wikitext103_schema, data_dir=tmp_path, mode=Dataset.InitializationMode.DOWNLOAD_AND_LOAD)

        assert (hashlib.sha512(dataset.data['train'].encode()).hexdigest() ==

        assert (hashlib.sha512(dataset.data['valid'].encode()).hexdigest() ==

        assert (hashlib.sha512(dataset.data['test'].encode()).hexdigest() ==
コード例 #10
ファイル: conftest.py プロジェクト: CODAIT/pydax
def downloaded_tensorflow_speech_commands_dataset(
        tensorflow_speech_commands_schema) -> Dataset:
    with TemporaryDirectory() as tmp_data_dir:
        yield Dataset(tensorflow_speech_commands_schema,