예제 #1
0
def test_find_builder_dir_legacy_ds(mock_fs: testing.MockFs):
  """Legacy dataset should be ignored (no feature config file)."""
  mock_fs.add_file('path/to/ds0/1.0.0/temp.txt')
  assert _find_builder_dir('ds0') is None

  mock_fs.add_file('path/to/ds0/1.0.0/features.json')
  assert _find_builder_dir('ds0') == 'path/to/ds0/1.0.0'
예제 #2
0
def test_get_version_str(mock_fs: testing.MockFs):

  mock_fs.add_file('path/to/ds/1.0.0/features.json')
  mock_fs.add_file('path/to/ds/1.0.1/features.json')
  mock_fs.add_file('path/to/ds/1.1.0/features.json')
  mock_fs.add_file('path/to/ds/2.0.1/features.json')

  get_version_str = functools.partial(
      read_only_builder._get_version_str, 'path/to/ds/'  # pylint: disable=protected-access
  )

  # requested_version is None -> Returns last version
  assert get_version_str(requested_version=None) == '2.0.1'
  # Returns highest matching version
  assert get_version_str(requested_version='1.*.*') == '1.1.0'
  assert get_version_str(requested_version='*.*.*') == '2.0.1'
  assert get_version_str(requested_version='1.0.0') == '1.0.0'
  # No matching version found
  assert get_version_str(requested_version='1.3.*') is None
  assert get_version_str(requested_version='2.3.5') is None

  assert _find_builder_dir('ds') == 'path/to/ds/2.0.1'
  assert _find_builder_dir('ds:*.*.*') == 'path/to/ds/2.0.1'
  assert _find_builder_dir('ds:1.*.*') == 'path/to/ds/1.1.0'
  assert _find_builder_dir('ds:1.0.0') == 'path/to/ds/1.0.0'
  assert _find_builder_dir('ds:1.3.*') is None
  assert _find_builder_dir('ds:2.3.5') is None
예제 #3
0
def test_find_builder_dir_multi_versions(mock_fs: testing.MockFs):
  """Versions should be sorted numerically (10 > 9)."""
  mock_fs.add_file('path/to/ds0/1.0.0/features.json')
  mock_fs.add_file('path/to/ds0/9.9.9/features.json')
  mock_fs.add_file('path/to/ds0/10.0.0/features.json')
  assert _find_builder_dir('ds0') == 'path/to/ds0/10.0.0'
  # Explicitly given version
  assert _find_builder_dir('ds0:9.9.9') == 'path/to/ds0/9.9.9'
  # Non-existing version
  assert _find_builder_dir('ds0:9.9.0') is None
예제 #4
0
def test_get_version_str_empty_builder_dir(mock_fs: testing.MockFs):
  builder_dir = 'path/to/ds/'
  error_msg = (f'The builder directory {builder_dir} doesn\'t contain any '
               'versions.')

  mock_fs.add_file(f'{builder_dir}features.json')
  get_version_str = functools.partial(
      read_only_builder._get_version_str,
      'path/to/ds/'  # pylint: disable=protected-access
  )

  with _assert_raises(error_msg):
    assert get_version_str() is None
예제 #5
0
def test_find_builder_config_code(mock_fs: testing.MockFs):
  """When code exists, extract the default config name."""

  class MyDataset(testing.DummyMnist):  # pylint: disable=unused-variable
    """Dummy dataset."""
    BUILDER_CONFIGS = [
        dataset_builder.BuilderConfig(  # pylint: disable=g-complex-comprehension
            name=name,
            version='2.0.0',
            description=f'{name} description'
        )
        for name in ('default_config', 'other_config')
    ]

  mock_fs.add_file('path/to/my_dataset/default_config/0.0.1/features.json')
  mock_fs.add_file('path/to/my_dataset/default_config/1.0.0/features.json')
  mock_fs.add_file('path/to/my_dataset/other_config/1.0.0/features.json')
  mock_fs.add_file('path/to/my_dataset/old_config/0.8.0/features.json')
  mock_fs.add_file('path/to/my_dataset/old_config/1.0.0/features.json')
  mock_fs.add_file('path/to/my_dataset/broken_config/features.json')
  mock_fs.add_file('path/to/my_dataset/0.0.1/features.json')

  # If code can be reached, use it to load the default config name
  # Note that the existing version is loaded, even if the code is at a
  # more recent version.
  assert (
      _find_builder_dir('my_dataset')
      == 'path/to/my_dataset/default_config/1.0.0'
  )
  # Explicitly given version with implicit config.
  assert (
      _find_builder_dir('my_dataset:0.0.1')
      == 'path/to/my_dataset/default_config/0.0.1'
  )
  # When config is explicitly given, load the last detected version
  assert (
      _find_builder_dir('my_dataset/other_config')
      == 'path/to/my_dataset/other_config/1.0.0'
  )
  assert (
      _find_builder_dir('my_dataset/old_config')
      == 'path/to/my_dataset/old_config/1.0.0'
  )
  assert (
      _find_builder_dir('my_dataset/old_config:0.8.0')
      == 'path/to/my_dataset/old_config/0.8.0'
  )
  assert _find_builder_dir('my_dataset/broken_config') is None
  assert _find_builder_dir('my_dataset/unknown_config') is None
예제 #6
0
def test_builder_from_directories_splits(mock_fs: testing.MockFs):
    def split_for(name: str, shard_lengths: Sequence[int]) -> proto.SplitInfo:
        return proto.SplitInfo(name=name, shard_lengths=shard_lengths)

    def dataset_info(splits):
        text_feature = proto.feature_pb2.Feature(
            python_class_name=
            'tensorflow_datasets.core.features.text_feature.Text',
            text=proto.feature_pb2.TextFeature())
        features = proto.feature_pb2.Feature(
            python_class_name=
            'tensorflow_datasets.core.features.features_dict.FeaturesDict',
            features_dict=proto.feature_pb2.FeaturesDict(
                features={'text': text_feature}))
        return proto.dataset_info_pb2.DatasetInfo(name='ds_name',
                                                  version='1.0.0',
                                                  file_format='tfrecord',
                                                  splits=splits,
                                                  features=features)

    split_train_1 = split_for('train', [4, 5])
    split_test_1 = split_for('test', [3])

    split_train_2 = split_for('train', [3, 7])
    builder_dirs = {
        '/path/dataset/a': dataset_info([split_train_1, split_test_1]),
        '/path/dataset/b': dataset_info([split_train_2]),
    }
    for builder_dir, di_proto in builder_dirs.items():
        content = json_format.MessageToJson(di_proto, sort_keys=True)
        mock_fs.add_file(path=f'{builder_dir}/dataset_info.json',
                         content=content)

    result = read_only_builder.builder_from_directories(
        list(builder_dirs.keys()))

    assert isinstance(result, read_only_builder.ReadOnlyBuilder)
    assert isinstance(result.info.splits['train'], splits_lib.MultiSplitInfo)
    assert isinstance(result.info.splits['test'], splits_lib.MultiSplitInfo)

    assert str(result.info.splits['test']) == (
        'MultiSplitInfo(name=\'test\', '
        'split_infos=[<SplitInfo num_examples=3, num_shards=1>])')
    assert str(result.info.splits['train']) == (
        'MultiSplitInfo(name=\'train\', split_infos=['
        '<SplitInfo num_examples=9, num_shards=2>, '
        '<SplitInfo num_examples=10, num_shards=2>])')
예제 #7
0
def test_find_builder_dir_with_multiple_data_dir(mock_fs: testing.MockFs):
  mock_fs.add_file('path/to/ds0/1.0.0/features.json')

  # Dataset not found.
  assert read_only_builder._find_builder_dir('ds0') is None

  with mock.patch.object(
      constants,
      'list_data_dirs',
      return_value=[constants.DATA_DIR, 'path/to'],
  ):
    assert read_only_builder._find_builder_dir('ds0') == 'path/to/ds0/1.0.0'

    # Dataset present in 2 different data_dir
    duplicate_path = os.path.join(constants.DATA_DIR, 'ds0/1.0.0/features.json')
    mock_fs.add_file(duplicate_path)
    with pytest.raises(ValueError, match='detected in multiple locations'):
      read_only_builder._find_builder_dir('ds0')
예제 #8
0
def test_get_version_str(mock_fs: testing.MockFs):

  mock_fs.add_file('path/to/ds/1.0.0/features.json')
  mock_fs.add_file('path/to/ds/1.0.1/features.json')
  mock_fs.add_file('path/to/ds/1.1.0/features.json')
  mock_fs.add_file('path/to/ds/2.0.1/features.json')

  get_version_str = functools.partial(
      read_only_builder._get_version_str,
      'path/to/ds/'  # pylint: disable=protected-access
  )

  with error_utils.reraise_with_context(registered.DatasetNotFoundError):
    # requested_version is None -> Returns last version
    assert get_version_str(requested_version=None) == '2.0.1'
    # Returns highest matching version
    assert get_version_str(requested_version='1.*.*') == '1.1.0'
    assert get_version_str(requested_version='*.*.*') == '2.0.1'
    assert get_version_str(requested_version='1.0.0') == '1.0.0'
    # No matching version found
    assert get_version_str(requested_version='1.3.*') is None
    assert get_version_str(requested_version='2.3.5') is None

    assert _find_builder_dir('ds') == 'path/to/ds/2.0.1'
    assert _find_builder_dir('ds:*.*.*') == 'path/to/ds/2.0.1'
    assert _find_builder_dir('ds:1.*.*') == 'path/to/ds/1.1.0'
    assert _find_builder_dir('ds:1.0.0') == 'path/to/ds/1.0.0'
    assert _find_builder_dir('ds:1.3.*') is None
    assert _find_builder_dir('ds:2.3.5') is None

  # No matching version found, updated error context.
  requested_version = '1.3.*'
  builder_dir = 'path/to/ds/'
  error_msg = (f'No version matching the requested {requested_version} was '
               f'found in the builder directory: {builder_dir}.')

  with _assert_raises(error_msg):
    assert get_version_str(requested_version=requested_version) is None
예제 #9
0
def test_find_builder_dir_bad_version_dir_name(mock_fs: testing.MockFs):
  """Ill-formatted folders should be ignored."""
  mock_fs.add_file('path/to/ds0/9.9./features.json')
  mock_fs.add_file('path/to/ds0/1.0.o/features.json')
  mock_fs.add_file('path/to/ds0/other/features.json')
  assert _find_builder_dir('ds0') is None

  mock_fs.add_file('path/to/ds0/1.1.0/features.json')
  assert _find_builder_dir('ds0') == 'path/to/ds0/1.1.0'
예제 #10
0
def test_find_builder_config_no_code(mock_fs: testing.MockFs):
  """When the code can't be reached, config should be explicit."""
  mock_fs.add_file('path/to/ds0/config/1.0.0/features.json')
  mock_fs.add_file('path/to/ds0/1.1.0/features.json')

  # If the original code can't be reached, assume no config
  assert _find_builder_dir('ds0') == 'path/to/ds0/1.1.0'
  # Config is explicitly given
  assert _find_builder_dir('ds0/config') == 'path/to/ds0/config/1.0.0'

  mock_fs.add_file('path/to/ds1/config/1.0.0/features.json')
  # Config not available, return None
  assert _find_builder_dir('ds1') is None
  assert _find_builder_dir('ds1/config') == 'path/to/ds1/config/1.0.0'
예제 #11
0
def test_find_builder_wrong_dir(mock_fs: testing.MockFs):
  mock_fs.add_file('path/to/ds0/1.1.0/features.json')
  assert _find_builder_dir('ds0') == 'path/to/ds0/1.1.0'
  assert _find_builder_dir('ds0', data_dir='path/to/other/dir') is None