Ejemplo n.º 1
0
def dummy_register():
    """Dummy register."""
    with tempfile.TemporaryDirectory() as tmp_path:
        tmp_path = pathlib.Path(tmp_path)

        # Prepare the datasets
        # Namespace 0
        Ds0(data_dir=tmp_path / 'kaggle').download_and_prepare()
        Ds1(data_dir=tmp_path / 'kaggle2').download_and_prepare()
        # Namespace 1
        Ds0(data_dir=tmp_path / 'mlds').download_and_prepare()
        # Namespace 2: (non-existing)

        content = textwrap.dedent(f"""
        [Namespaces]
        kaggle=[
            '{os.fspath(tmp_path / 'kaggle')}',
            '{os.fspath(tmp_path / 'kaggle2')}',
        ]
        mlds='{os.fspath(tmp_path / 'mlds')}'
        other='/tmp/path/to/non-existing-path'
        """)

        dummy_path = tmp_path / 'dummy-community-datasets.toml'
        dummy_path.write_text(content)
        yield register_path.DataDirRegister(path=dummy_path)
Ejemplo n.º 2
0
def _load_register_for_paths(
    namespace: str,
    paths: List[epath.Path],
) -> List[register_base.BaseRegister]:
  """Returns a list of registers for the given paths."""
  code_paths = []
  data_paths = []
  for path in paths:
    if os.fspath(path).startswith('github'):
      code_paths.append(path)
    else:
      data_paths.append(path)

  if code_paths and data_paths:
    raise RuntimeError(f'Both a path containing code ({code_paths}) and '
                       f'a path containing data ({data_paths} are specified. '
                       'This is not supported')

  registers = []
  if data_paths:
    register = register_path.DataDirRegister(
        namespace_to_data_dirs={namespace: data_paths})
    registers.append(register)
  if code_paths:
    registers.append(
        register_package.PackageRegister(
            path=gcs_utils.GCS_COMMUNITY_INDEX_PATH))
  return registers
Ejemplo n.º 3
0
def test_data_dir_register():
    register = register_path.DataDirRegister(
        namespace_to_data_dirs={'ns1': [epath.Path('/path/ns1')]})
    assert {'ns1'} == register.namespaces