Beispiel #1
0
def build_catalog(
    datasets: Optional[List[str]] = None,
    *,
    catalog_dir: Optional[tfds.core.utils.PathLike] = None,
    doc_util_paths: Optional[doc_utils.DocUtilPaths] = None,
    toc_relative_path: str = '/datasets/catalog/',
) -> None:
    """Document all datasets, including the table of content.

  Args:
    datasets: Lists of dataset to document (all if not set)
    catalog_dir: Destination path for the catalog
    doc_util_paths: Additional path for visualization, nightly info,...
    toc_relative_path: Relative path of the catalog directory, used to
      generate the table of content relative links.
  """
    catalog_dir = tfds.core.as_path(catalog_dir)

    # Iterate over the builder documentations
    section_to_builder_docs = collections.defaultdict(list)
    for builder_doc in document_datasets.iter_documentation_builders(
            datasets, doc_util_paths=doc_util_paths
            or doc_utils.DocUtilPaths()):
        # Write the builder documentation
        dataset_file = catalog_dir / f'{builder_doc.name}.md'
        dataset_file.write_text(builder_doc.content)
        # Save the category
        section_to_builder_docs[builder_doc.section].append(builder_doc)

    _save_table_of_content(
        catalog_dir=catalog_dir,
        section_to_builder_docs=section_to_builder_docs,
        toc_relative_path=toc_relative_path,
    )
def test_build_catalog(tmp_path: pathlib.Path):
  """Tests that build_catalog correctly generate the index."""
  build_catalog.build_catalog(
      datasets=['mnist', 'coco'],
      catalog_dir=tmp_path,
      doc_util_paths=doc_utils.DocUtilPaths(
          fig_base_path=None,
          df_base_path=None,
          nightly_path=None,
      ),
  )
  assert sorted(f.name for f in tmp_path.iterdir()) == [
      '_toc.yaml',
      'coco.md',
      'mnist.md',
      'overview.md',
  ]

  content = tmp_path.joinpath('_toc.yaml').read_text()
  assert 'coco' in content
  assert 'mnist' in content

  content = tmp_path.joinpath('overview.md').read_text()
  assert 'coco' in content
  assert 'mnist' in content
def test_document_datasets():
    all_docs = list(
        document_datasets.iter_documentation_builders(
            datasets=['mnist', 'coco'],  # Builder with and without config
            doc_util_paths=doc_utils.DocUtilPaths(
                fig_base_path=None,
                df_base_path=None,
                nightly_path=None,
            ),
        ))
    assert {d.name for d in all_docs} == {'mnist', 'coco'}
def build_catalog(
    datasets: Optional[List[str]] = None,
    *,
    catalog_dir: Optional[tfds.core.utils.PathLike] = None,
    doc_util_paths: Optional[doc_utils.DocUtilPaths] = None,
    toc_relative_path: str = '/datasets/catalog/',
    index_template: Optional[tfds.core.utils.PathLike] = None,
    index_filename: str = 'overview.md',
    dataset_types: Optional[List[tfds.core.visibility.DatasetType]] = None,
) -> None:
  """Document all datasets, including the table of content.

  Args:
    datasets: Lists of dataset to document (all if not set)
    catalog_dir: Destination path for the catalog
    doc_util_paths: Additional path for visualization, nightly info,...
    toc_relative_path: Relative path of the catalog directory, used to generate
      the table of content relative links.
    index_template: Default template for the index page.
    index_filename: Name of the catalog index file.
    dataset_types: Restrict the generation to the given dataset types. Default
      to all open source non-community datasets
  """
  dataset_types = dataset_types or [
      tfds.core.visibility.DatasetType.TFDS_PUBLIC,
      tfds.core.visibility.DatasetType
  ]
  tfds.core.visibility.set_availables(dataset_types)

  catalog_dir = tfds.core.as_path(catalog_dir)
  index_template = index_template or tfds.core.tfds_path(
      'scripts/documentation/templates/catalog_overview.md')
  index_template = tfds.core.as_path(index_template)

  # Iterate over the builder documentations
  section_to_builder_docs = collections.defaultdict(list)
  for builder_doc in document_datasets.iter_documentation_builders(
      datasets, doc_util_paths=doc_util_paths or doc_utils.DocUtilPaths()):
    # Write the builder documentation
    dataset_file = catalog_dir / f'{builder_doc.filestem}.md'
    dataset_file.write_text(builder_doc.content)
    # Save the category
    section_to_builder_docs[builder_doc.section].append(builder_doc)

  _save_table_of_content(
      catalog_dir=catalog_dir,
      section_to_builder_docs=section_to_builder_docs,
      toc_relative_path=toc_relative_path,
      index_template=index_template,
      index_filename=index_filename,
  )