예제 #1
0
    def test_download_true(self, tmp_path, downloaded_gmb_dataset):
        "Test to see the function downloads and loads properly when download=True."

        init(DATADIR=tmp_path)
        downloaded_gmb_dataset_data = downloaded_gmb_dataset.load()
        gmb_data = load_dataset('gmb', version='1.0.2', download=True)
        assert downloaded_gmb_dataset_data == gmb_data
예제 #2
0
    def test_loading_undownloaded(self, tmp_path):
        "Test loading before ``Dataset.download()`` has been called."

        init(DATADIR=tmp_path)
        with pytest.raises(RuntimeError) as e:
            load_dataset('wikitext103', version='1.0.1', download=False)
        assert 'Did you forget to download the dataset (by specifying `download=True`)?' in str(e.value)
예제 #3
0
    def test_version_param(self, tmp_path):
        "Test to see the version parameter is being handled properly."

        init(DATADIR=tmp_path)

        with pytest.raises(TypeError) as e:
            load_dataset('gmb', version=1.0)
        assert str(e.value) == 'The version parameter must be supplied a str.'

        name, version = 'gmb', ''
        with pytest.raises(KeyError) as e:
            load_dataset('gmb', version=version)
        assert str(e.value) == (
            f'\'"{version}" is not a valid PyDAX version for the dataset "{name}". '
            'You can view all valid datasets and their versions by running the function '
            'pydax.list_all_datasets().\'')

        name, version = 'gmb', 'fake_version'
        with pytest.raises(KeyError) as e:
            load_dataset('gmb', version=version)
        assert str(e.value) == (
            f'\'"{version}" is not a valid PyDAX version for the dataset "{name}". '
            'You can view all valid datasets and their versions by running the function '
            'pydax.list_all_datasets().\'')

        # If no version specified, make sure latest version grabbed
        all_datasets = list_all_datasets()
        latest_version = str(
            sorted(version_parser(v) for v in all_datasets[name])[-1])
        assert load_dataset('gmb') == load_dataset('gmb',
                                                   version=latest_version)
예제 #4
0
    def test_export_schema_collections(self, schema_file_absolute_dir,
                                       schema_file_https_url):
        "Test high-level export_schema_collections function."

        assert export_schema_collections() is not _get_schema_collections()
        # The two returned schemata should equal
        assert (json.dumps(export_schema_collections().
                           schema_collections['datasets'].export_schema(),
                           sort_keys=True,
                           indent=2,
                           default=str) == json.dumps(_get_schema_collections(
                           ).schema_collections['datasets'].export_schema(),
                                                      sort_keys=True,
                                                      indent=2,
                                                      default=str))

        # Different from https url used by pydax_initialization autouse fixture
        new_urls = {
            'DATASET_SCHEMA_FILE_URL':
            schema_file_absolute_dir / 'datasets.yaml',
            'LICENSE_SCHEMA_FILE_URL':
            schema_file_absolute_dir / 'licenses.yaml'
        }
        init(update_only=True, **new_urls)
        assert (
            export_schema_collections().schema_collections['formats'].
            retrieved_url_or_path == f'{schema_file_https_url}/formats.yaml')
        assert (export_schema_collections().schema_collections['datasets'].
                retrieved_url_or_path == new_urls['DATASET_SCHEMA_FILE_URL'])
        assert (export_schema_collections().schema_collections['licenses'].
                retrieved_url_or_path == new_urls['LICENSE_SCHEMA_FILE_URL'])
예제 #5
0
    def test_load_schema_collections(self, loaded_schema_collections,
                                     schema_file_absolute_dir):
        "Test high-level load_schema_collections function."

        init(update_only=False,
             DATASET_SCHEMA_FILE_URL=loaded_schema_collections.
             schema_collections['datasets'].retrieved_url_or_path,
             FORMAT_SCHEMA_FILE_URL=loaded_schema_collections.
             schema_collections['formats'].retrieved_url_or_path,
             LICENSE_SCHEMA_FILE_URL=loaded_schema_collections.
             schema_collections['licenses'].retrieved_url_or_path)
        load_schema_collections(force_reload=True)
        for name in ('datasets', 'formats', 'licenses'):
            assert (_get_schema_collections().schema_collections[name].
                    retrieved_url_or_path == loaded_schema_collections.
                    schema_collections[name].retrieved_url_or_path)

        init(
            update_only=True,
            # Different from the previous relative path used in loaded_schemata
            DATASET_SCHEMA_FILE_URL=schema_file_absolute_dir / 'datasets.yaml')
        load_schema_collections(force_reload=False)
        for name in ('formats', 'licenses'):
            assert (_get_schema_collections().schema_collections[name].
                    retrieved_url_or_path == loaded_schema_collections.
                    schema_collections[name].retrieved_url_or_path)
        assert (_get_schema_collections().schema_collections['datasets'].
                retrieved_url_or_path == schema_file_absolute_dir /
                'datasets.yaml')
예제 #6
0
    def test_loading_undownloaded(self, tmp_path):
        "Test loading before ``Dataset.download()`` has been called."

        init(DATADIR=tmp_path)
        with pytest.raises(FileNotFoundError) as e:
            load_dataset('wikitext103', version='1.0.1', download=False)
        assert 'Failed to load the dataset because some files are not found.' in str(e.value)
예제 #7
0
    def test_custom_relative_data_dir(self, chdir_tmp_path, tmp_sub_dir,
                                      tmp_relative_sub_dir):
        "Test using a custom relative data directory."

        init(DATADIR=tmp_relative_sub_dir)
        assert get_config().DATADIR == tmp_sub_dir
        assert get_config().DATADIR.is_absolute()
예제 #8
0
 def test_secure_connections_succeed_load_schema_collections(self, dataset_schema_url_or_path):
     "Test secure connections that should succeed for :func:`pydax.load_schema_collections`."
     # We use '/' instead of os.path.sep because URLs only accept / not \ as separators, but Windows path accepts
     # both. This is not an issue for the purpose of this test.
     init(update_only=True, DATASET_SCHEMA_FILE_URL=dataset_schema_url_or_path)
     load_schema_collections(force_reload=True, tls_verification=True)
     assert (export_schema_collections().schema_collections['datasets'].retrieved_url_or_path ==
             dataset_schema_url_or_path)
예제 #9
0
    def test_non_path_data_dir(self):
        "Test exception when a nonpath is passed as DATADIR."

        with pytest.raises(ValidationError) as e:
            init(DATADIR=10)

        assert re.search((r'1 validation error for Config\s+DATADIR\s+value'
                          r' is not a valid path \(type=type_error.path\)'), str(e.value))
예제 #10
0
    def test_download_false(self, tmp_path, gmb_schema):
        "Test to see the function loads properly when download=False and dataset was previously downloaded."

        init(DATADIR=tmp_path)
        data_dir = tmp_path / 'gmb' / '1.0.2'
        gmb = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.DOWNLOAD_AND_LOAD)
        gmb_data = load_dataset('gmb', version='1.0.2', download=False)
        assert gmb.data == gmb_data
예제 #11
0
    def test_custom_data_dir(self, tmp_path, wikitext103_schema):
        "Test to make sure Dataset constructor uses new global data dir if one was supplied earlier to pydax.init."

        init(DATADIR=tmp_path)
        assert get_config().DATADIR == tmp_path
        assert isinstance(get_config().DATADIR, pathlib.Path)
        wikitext = Dataset(wikitext103_schema, data_dir=tmp_path, mode=Dataset.InitializationMode.LAZY)
        assert wikitext._data_dir == tmp_path
        assert isinstance(wikitext._data_dir, pathlib.Path)
예제 #12
0
    def test_default_dataset_schema_name(self, tmp_path, gmb_schema):
        "Test the default schemata name."

        init(DATADIR=tmp_path)
        data_dir = tmp_path / 'default' / 'gmb' / '1.0.2'
        gmb = Dataset(gmb_schema, data_dir=data_dir, mode=Dataset.InitializationMode.DOWNLOAD_AND_LOAD)
        _get_schemata().schemata['datasets']._schema.pop('name')  # Remove the "name" key
        gmb_data = load_dataset('gmb', version='1.0.2', download=False)
        assert gmb.data == gmb_data
예제 #13
0
    def test_loading_undownloaded(self, tmp_path):
        "Test loading before ``Dataset.download()`` has been called."

        init(DATADIR=tmp_path)
        with pytest.raises(RuntimeError) as e:
            load_dataset('wikitext103', version='1.0.1', download=False)
        assert (
            'Did you forget to download the dataset '
            '(by calling this function with `download=True` for at least once)?'
        ) in str(e.value)
예제 #14
0
    def test_subdatasets_param(self, tmp_path):
        "Test to see subdatasets parameter is being handled properly."

        init(DATADIR=tmp_path)

        with pytest.raises(TypeError) as e:
            load_dataset('wikitext103', version='1.0.1', download=True, subdatasets=123)
        assert str(e.value) == '\'int\' object is not iterable'

        subdatasets = ['train']
        wikitext103_data = load_dataset('wikitext103', version='1.0.1', download=True, subdatasets=subdatasets)
        assert list(wikitext103_data.keys()) == subdatasets
예제 #15
0
    def test_insecure_connections_load_schemata(self,
                                                remote_dataset_schema_url,
                                                untrust_self_signed_cert):
        "Test insecure connections that should fail when ``tls_verification=True`` for ``load_schemata``."
        init(update_only=True, DATASET_SCHEMA_URL=remote_dataset_schema_url)
        with pytest.raises(InsecureConnectionError) as e:
            load_schemata(force_reload=True, tls_verification=True)
        assert remote_dataset_schema_url in str(e.value)

        # Insecure load succeeds, no exception raised
        load_schemata(force_reload=True, tls_verification=False)
        assert export_schemata().schemata[
            'datasets'].retrieved_url_or_path == remote_dataset_schema_url
예제 #16
0
    def test_name_param(self, tmp_path):
        "Test to see the name parameter is being handled properly."

        init(DATADIR=tmp_path)

        with pytest.raises(TypeError) as e:
            load_dataset(123)
        assert str(e.value) == 'The name parameter must be supplied a str.'

        name = 'fake_dataset'
        with pytest.raises(KeyError) as e:
            load_dataset(name)
        assert str(e.value) == (f'\'"{name}" is not a valid PyDAX dataset. You can view all valid datasets and their '
                                'versions by running the function pydax.list_all_datasets().\'')
예제 #17
0
파일: conftest.py 프로젝트: djalova/pydax
def pydax_initialization(schema_file_https_url, schema_localized_url):
    """Create the default initialization used for all tests. This is mainly for having a uniform initialization for all
    tests as well as avoiding using the actual default schema file URLs so as to decouple the two lines of development
    (default schema files and this library). It also replaces all download URLs with localized URLs."""

    init(update_only=False,
         DATASET_SCHEMA_URL=f'{schema_file_https_url}/datasets.yaml',
         FORMAT_SCHEMA_URL=f'{schema_file_https_url}/formats.yaml',
         LICENSE_SCHEMA_URL=f'{schema_file_https_url}/licenses.yaml')

    # Use local dataset locations by default in our tests
    datasets = _get_schemata().schemata['datasets']._schema['datasets']
    for name, versions in datasets.items():
        for version in versions:
            datasets[name][version] = schema_localized_url(name, version)
예제 #18
0
파일: test_config.py 프로젝트: CODAIT/pydax
    def test_default_schema_url_content(self):
        """Test the content of the remote URLs a bit. We only assert them not being None here just in case the server
        returns zero-length files."""

        init(update_only=False)

        # We only assert that we have retrieved some non-empty files in this test. This is because we want to decouple
        # the maintenance of schema files in production with the library development. These files likely would change
        # more regularly than the library. For this reason, we also verify the default schema URLs are also valid https
        # links in ``test_default_schema_url_https``.

        # This test is in `test_config.py` not in `test_schema_retrieval.py` because this test is more about the content
        # of the default schema URLs than the retrieving functionality.
        assert len(retrieve_schema_file(Config.DATASET_SCHEMA_FILE_URL)) > 0
        assert len(retrieve_schema_file(Config.FORMAT_SCHEMA_FILE_URL)) > 0
        assert len(retrieve_schema_file(Config.LICENSE_SCHEMA_FILE_URL)) > 0
예제 #19
0
    def test_custom_configs(self):
        "Test custom configs."

        init(update_only=False)  # set back everything to default
        assert dataclasses.asdict(get_config()) == dataclasses.asdict(Config())

        new_urls = {
            'DATASET_SCHEMA_FILE_URL': 'some/local/file',
            'FORMAT_SCHEMA_FILE_URL': 'file://c:/some/other/local/file',
            'LICENSE_SCHEMA_FILE_URL': 'http://some/remote/file'
        }
        init(update_only=True, **new_urls)

        for url, val in new_urls.items():
            assert getattr(get_config(), url) == val
        assert get_config().DATADIR == Config.DATADIR
예제 #20
0
파일: demo.py 프로젝트: birkelbach/OpenDAX
import pydax
import time

pydax.init("PyDAX")

new_type = (("Mem1", "BOOL", 10),
            ("Mem2", "BOOL", 1),
            ("Mem3", "BOOL", 3))

x = pydax.cdt_create("PyDAX_Type", new_type)
#print hex(x)

pydax.add("PyBYTE", "BYTE", 10)
pydax.add("PySINT", "INT", 10)
pydax.add("PyINT", "INT", 10)
pydax.add("PyINT", "INT", 10)
pydax.add("PyBOOL", "BOOL", 10)

pydax.add("PyCDTTAG", "PyDAX_TYPE", 1)

for n in range(1000):
  print pydax.read("PyCDTTAG", 0)
  time.sleep(1)
#print pydax.get("PyDAXTAG")
#print pydax.get(0)
#print pydax.get(1)
#print pydax.get(2)

#print pydax.cdt_get("PyDAX_Type")

#print pydax.read("PyBYTE", 0)
예제 #21
0
import pydax
import time

pydax.init("PyDAX")

new_type = (("Mem1", "BOOL", 10), ("Mem2", "BOOL", 1), ("Mem3", "BOOL", 3))

x = pydax.cdt_create("PyDAX_Type", new_type)
#print hex(x)

pydax.add("PyBYTE", "BYTE", 10)
pydax.add("PySINT", "INT", 10)
pydax.add("PyINT", "INT", 10)
pydax.add("PyINT", "INT", 10)
pydax.add("PyBOOL", "BOOL", 10)

pydax.add("PyCDTTAG", "PyDAX_TYPE", 1)

for n in range(1000):
    print pydax.read("PyCDTTAG", 0)
    time.sleep(1)
#print pydax.get("PyDAXTAG")
#print pydax.get(0)
#print pydax.get(1)
#print pydax.get(2)

#print pydax.cdt_get("PyDAX_Type")

#print pydax.read("PyBYTE", 0)
#print pydax.read("PyBOOL[0]", 1)
#print pydax.read("PyBOOL", 0)
예제 #22
0
    def test_custom_symlink_data_dir(self, tmp_symlink_dir):
        "Test using a custom symlink data directory. The symlink should not be resolved."

        init(DATADIR=tmp_symlink_dir)
        assert get_config().DATADIR == tmp_symlink_dir
예제 #23
0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# This scripts tests all files in CODAIT/dax-schemata. It shouldn't report any error.

import yaml

import pydax

pydax.init(DATASET_SCHEMA_URL='datasets.yaml',
           FORMAT_SCHEMA_URL='formats.yaml',
           LICENSE_SCHEMA_URL='licenses.yaml')

with open('datasets.yaml') as f:
    datasets = yaml.safe_load(f)

# Datasets name are the same from the schema files. This helps ensure that PyDAX doesn't miss any dataset during the
# test.
assert frozenset(datasets['datasets']) == frozenset(pydax.list_all_datasets())
# Sanity check. In case of all tests being skipped because of a minor error such as in formatting.
assert len(pydax.list_all_datasets()) > 0

for name, versions in pydax.list_all_datasets().items():
    # Versions must be the same from the schema files. This helps ensure that PyDAX doesn't miss any dataset during the
    # test.
    assert frozenset(datasets['datasets'][name]) == frozenset(versions)