Python Path.iterdir Examples

Programming Language: Python

Namespace/Package Name: etils.epath

Class/Type: Path

Method/Function: iterdir

Examples at hotexamples.com: 6

Python Path.iterdir - 6 examples found. These are the top rated real world Python examples of etils.epath.Path.iterdir extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

iterdir(6)

exists(4)

read_text(2)

is_dir(1)

is_relative_to(1)

joinpath(1)

replace(1)

write_text(1)

Example #1

Show file

def download_gcs_folder(
    gcs_folder: epath.Path,
    local_folder: epath.PathLike,
    max_simultaneous_downloads: int = 25,
) -> None:
    """Downloads prepared GCS folder to local folder."""
    if _is_gcs_disabled:
        raise AssertionError('Cannot download from GCS when _is_gcs_disabled')

    # Filter out the diffs folder if present
    paths_to_dl = [p for p in gcs_folder.iterdir() if p.name != 'diffs']

    with tqdm_utils.async_tqdm(total=len(paths_to_dl),
                               desc='Dl Completed...',
                               unit=' file') as pbar:

        def _copy(gcs_path_: epath.Path):
            # Copy 'gs://tfds-data/datasets/ds/1.0.0/file' -> `local_dir/file`
            tf.io.gfile.copy(
                os.fspath(gcs_path_),
                os.path.join(local_folder, gcs_path_.name),
            )
            pbar.update(1)

        with concurrent.futures.ThreadPoolExecutor(
                max_workers=max_simultaneous_downloads) as executor:
            futures = [executor.submit(_copy, path) for path in paths_to_dl]
            for future in concurrent.futures.as_completed(futures):
                future.result()

Example #2

Show file

File: register_package.py Project: tensorflow/datasets

def _compute_dir_hash(path: epath.Path) -> str:
  """Computes the checksums of the given directory deterministically."""
  all_files = sorted(path.iterdir())

  if any(f.is_dir() for f in all_files):
    raise ValueError('Installed package should only contains files.')

  # Concatenate the filenames and files content to create the directory hash
  all_checksums = [f.name for f in all_files]
  all_checksums += [checksums.compute_url_info(f).checksum for f in all_files]
  return hashlib.sha256(''.join(all_checksums).encode()).hexdigest()

Example #3

Show file

File: register_path.py Project: tensorflow/datasets

def _maybe_iterdir(path: epath.Path) -> Iterator[epath.Path]:
    """Same as `path.iterdir()`, but don't fail if path does not exist."""
    # Use try/except rather than `.exists()` to avoid an extra RPC call
    # per namespace
    try:
        for f in path.iterdir():
            yield f
    except (
            FileNotFoundError,
            tf.errors.NotFoundError,
            tf.errors.PermissionDeniedError,
    ) as e:
        pass

Example #4

Show file

File: register_package.py Project: suvarnak/datasets

def list_ds_packages_for_namespace(
    namespace: str,
    path: epath.Path,
) -> List[DatasetPackage]:
    """Returns the dataset names found in a specific directory.

  Directories that contain code should have the following structure:

  ```
  <path>/
      <dataset0>/
          <dataset0>.py
      <dataset1>/
          <dataset1>.py
      ...
  ```

  Additional files or folders which are not detected as datasets will be
  ignored (e.g. `__init__.py`).

  Args:
    namespace: Namespace of the datasets
    path: The directory path containing the datasets.

  Returns:
    ds_packages: The dataset packages found in the directory (sorted for
      determinism).

  Raises:
    FileNotFoundError: If the path cannot be reached.
  """
    if not path.exists():
        # Should be fault-tolerant in the future
        raise FileNotFoundError(f'Could not find datasets at {path}')

    all_packages = []
    for ds_path in path.iterdir():
        source = get_dataset_source(ds_path)
        if source:
            pkg = DatasetPackage(
                name=naming.DatasetName(namespace=namespace,
                                        name=ds_path.name),
                source=source,
            )
            all_packages.append(pkg)

    return all_packages

Example #5

Show file

def _extract_split_files(data_dir: epath.Path) -> _SplitFilesDict:
    """Extract the files."""
    files = sorted(data_dir.iterdir())
    file_infos = [
        naming.FilenameInfo.from_str(f.name) for f in files
        if naming.FilenameInfo.is_valid(f.name)
    ]
    if not file_infos:
        raise ValueError(
            f'No example files detected in {data_dir}. Make sure to follow the '
            'pattern: '
            '`<dataset_name>-<split_name>.<file-extension>-xxxxxx-of-yyyyyy`')

    split_files = collections.defaultdict(list)
    for file_info in file_infos:
        split_files[file_info.split].append(file_info)

    return split_files

Example #6

Show file

File: register_package.py Project: suvarnak/datasets

def get_dataset_source(
    ds_path: epath.Path, ) -> Optional[dataset_sources_lib.DatasetSource]:
    """Returns a `DatasetSource` instance if the given path corresponds to a dataset.

  To determine whether the given path contains a dataset, a simple heuristic is
  used that checks whether the path has the following structure:

  ```
  <ds_name>/
      <ds_name>.py
  ```

  If so, all `.py`, `.txt`, `.tsv`, `.json` files will be added to the package.

  Args:
    ds_path: Path of the dataset module

  Returns:
    A `DatasetSource` instance if the path matches the expected file structure.
  """
    filter_list = {'__init__.py'}
    suffixes_list = ('.txt', '.tsv', '.py', '.json')

    def is_interesting_file(fname: str) -> bool:
        return fname.endswith(suffixes_list) and fname not in filter_list

    if not ds_path.is_dir():
        return None
    all_filenames = set(f.name for f in ds_path.iterdir())
    if f'{ds_path.name}.py' not in all_filenames:
        return None

    return dataset_sources_lib.DatasetSource(
        root_path=ds_path,
        filenames=sorted(
            [fname for fname in all_filenames if is_interesting_file(fname)]),
    )