def test_nightly_doc_util(self): data_dir = '/tmp/dummy_dir' nightly_dict = { 'dummy_dataset': { '': { '1.0.0': False } }, 'dummy_new_ds': True, 'dummy_new_config': { 'new_config': True, 'old_config': { '2.0.0': True, # New versions '1.0.0': False, }, }, } with mock.patch.object(doc_utils, '_load_nightly_dict', return_value=nightly_dict): ndu = doc_utils.NightlyDocUtil(path='/tmp/some/patched/path') dummy_dataset = tfds.testing.DummyDataset(data_dir=data_dir) dummy_new_ds = DummyNewDs(data_dir=data_dir) dummy_new_config = DummyNewConfig(data_dir=data_dir, config='new_config') dummy_new_version = DummyNewConfig(data_dir=data_dir, config='old_config') # Only `dummy_new_ds` is a new builder self.assertFalse(ndu.is_builder_nightly(dummy_dataset)) self.assertTrue(ndu.is_builder_nightly(dummy_new_ds)) self.assertFalse(ndu.is_builder_nightly(dummy_new_config)) self.assertFalse(ndu.is_builder_nightly(dummy_new_version)) # Only `dummy_new_ds/new_config` is a new config self.assertFalse(ndu.is_config_nightly(dummy_dataset)) self.assertFalse(ndu.is_config_nightly(dummy_new_ds)) self.assertTrue(ndu.is_config_nightly(dummy_new_config)) self.assertFalse(ndu.is_config_nightly(dummy_new_version)) # Only `dummy_new_ds/new_version/2.0.0` is a new version self.assertFalse(ndu.is_version_nightly(dummy_dataset, '1.0.0')) self.assertFalse(ndu.is_version_nightly(dummy_new_ds, 'x.x.x')) self.assertFalse(ndu.is_version_nightly(dummy_new_config, 'x.x.x')) self.assertFalse(ndu.is_version_nightly(dummy_new_version, '1.0.0')) self.assertTrue(ndu.is_version_nightly(dummy_new_version, '2.0.0')) # Only `dummy_dataset` don't have a nightly version self.assertFalse(ndu.has_nightly(dummy_dataset)) self.assertTrue(ndu.has_nightly(dummy_new_ds)) self.assertTrue(ndu.has_nightly(dummy_new_config)) self.assertTrue(ndu.has_nightly(dummy_new_version))
def iter_documentation_builders( datasets: Optional[List[str]] = None, *, doc_util_paths: doc_utils.DocUtilPaths = None, ) -> Iterator[BuilderDocumentation]: """Create dataset documentation string for given datasets. Args: datasets: list of datasets for which to create documentation. If None, then all available datasets will be used. doc_util_paths: Additional path for visualization, nightly info,... Yields: builder_documetation: The documentation information for each builder """ print('Retrieving the list of builders...') datasets = datasets or _all_tfds_datasets() if doc_util_paths.fig_base_path: visu_doc_util = doc_utils.VisualizationDocUtil( base_path=doc_util_paths.fig_base_path, base_url=doc_util_paths.fig_base_url, ) else: visu_doc_util = None if doc_util_paths.df_base_path: df_doc_util = doc_utils.DataframeDocUtil( base_path=doc_util_paths.df_base_path, base_url=doc_util_paths.df_base_url, ) else: df_doc_util = None if doc_util_paths.fig_base_path: nightly_doc_util = doc_utils.NightlyDocUtil( path=doc_util_paths.nightly_path, ) else: nightly_doc_util = None document_single_builder_fn = functools.partial( _document_single_builder, visu_doc_util=visu_doc_util, df_doc_util=df_doc_util, nightly_doc_util=nightly_doc_util, ) # Document all builders print(f'Document {len(datasets)} builders...') with futures.ThreadPoolExecutor( max_workers=_WORKER_COUNT_DATASETS) as tpool: tasks = [ tpool.submit(document_single_builder_fn, name) for name in datasets ] for future in tqdm.tqdm(futures.as_completed(tasks), total=len(tasks)): builder_doc = future.result() if builder_doc is None: # Builder filtered continue else: tqdm.tqdm.write( f'Documentation generated for {builder_doc.name}...') yield builder_doc print('All builder documentations generated!')