Ejemplo n.º 1
0
 def test_document_datasets(self):
   document_datasets.dataset_docs_str(datasets=['mnist', 'cifar10'])
Ejemplo n.º 2
0
def build_catalog(
    datasets: Optional[List[str]] = None,
    catalog_dir: Optional[str] = None,
    toc_relative_path: str = '/datasets/catalog/',
) -> None:
    """Document all datasets, including the table of content.

  Args:
    datasets: Lists of dataset to document (all if not set)
    catalog_dir: Destination path for the catalog
    toc_relative_path: Relative path of the catalog directory, used to
      generate the table of content relative links.
  """
    # Build datasets doc
    print('Build datasets overview...')
    overview_doc, datasets_dict = document_datasets.dataset_docs_str(datasets)

    # For _toc.yaml
    toc_dictionary = {
        'toc': [{
            'title': 'Overview',
            'path': os.path.join(toc_relative_path, 'overview'),
        }]
    }

    section_tocs = []

    nightly_util = document_datasets.NightlyDocUtil()

    print('Build Sections')
    for section, datasets_in_section in sorted(list(datasets_dict.items())):
        print('Section %s...' % section)
        section_str = section.replace('_', ' ').capitalize()
        sec_dict = {'title': section_str}
        sec_paths = list()
        section_toc = []
        for dataset_name, is_manual, doc in datasets_in_section:
            print('Dataset %s...' % dataset_name)

            sidebar_item = {
                'path': os.path.join(toc_relative_path, dataset_name),
                'title': dataset_name + (' (manual)' if is_manual else '')
            }
            ds_item = DatasetItem(
                name=dataset_name,
                path=dataset_name + '.md',
            )
            if nightly_util.is_builder_nightly(dataset_name):
                sidebar_item['status'] = 'nightly'
                ds_item.is_nightly = True

            sec_paths.append(sidebar_item)
            section_toc.append(ds_item)

            dataset_file = os.path.join(catalog_dir, dataset_name + '.md')
            with tf.io.gfile.GFile(dataset_file, 'w') as f:
                f.write(doc)

        section_tocs.append(create_section_toc(section_str, section_toc))
        sec_dict['section'] = sec_paths
        toc_dictionary['toc'].append(sec_dict)

    with tf.io.gfile.GFile(os.path.join(catalog_dir, 'overview.md'), 'w') as f:
        f.write(overview_doc.format(toc='\n'.join(section_tocs)))

    with tf.io.gfile.GFile(os.path.join(catalog_dir, '_toc.yaml'), 'w') as f:
        yaml.dump(toc_dictionary, f, default_flow_style=False)