def get_tables_for_datasets(names: List[str]) -> List[str]: tables: List[str] = [] for name in names: schema = list_wrap(nycdb.dataset.datasets()[name]["schema"]) tables.extend([t["table_name"] for t in schema]) return tables
def get_dataset_tables() -> List[TableInfo]: result: List[TableInfo] = [] for dataset_name, info in nycdb.dataset.datasets().items(): for schema in list_wrap(info['schema']): result.append(TableInfo(name=schema['table_name'], dataset=dataset_name)) result.extend([ TableInfo(name=name, dataset=dataset_name) for name in parse_nycdb_created_tables(info.get('sql', [])) ]) return result
def ensure_dataset(self, name: str, force_refresh: bool = False) -> None: dataset = nycdb.dataset.datasets()[name] tables: List[str] = [ schema['table_name'] for schema in list_wrap(dataset['schema']) ] tables_str = 'table' if len(tables) == 1 else 'tables' print( f"Ensuring NYCDB dataset '{name}' is loaded with {len(tables)} {tables_str}..." ) if force_refresh: self.drop_tables(*tables) self.delete_downloaded_data(*tables) if not self.do_tables_exist(*tables): print(f"Table {name} not found in the database. Downloading...") self.get_nycdb_dataset(name).download_files() print(f"Loading {name} into the database...") self.get_nycdb_dataset(name).db_import() else: print(f"Table {name} already exists.")
def ensure_dataset(self, name: str, force_refresh: bool = False) -> None: dataset = nycdb.dataset.datasets()[name] tables: List[str] = [ schema['table_name'] for schema in list_wrap(dataset['schema']) ] tables_str = 'table' if len(tables) == 1 else 'tables' print(f"Ensuring NYCDB dataset '{name}' is loaded with {len(tables)} {tables_str}...") if force_refresh: self.drop_tables(*tables) self.delete_downloaded_data(*tables) if not self.do_tables_exist(*tables): print(f"Table {name} not found in the database. Downloading...") self.call_nycdb('--download', name) print(f"Loading {name} into the database...") self.call_nycdb('--load', name) elif not self.is_testing: print(f"Table {name} already exists. Verifying row count...") self.call_nycdb('--verify', name)