Example #1
0
def get_tables_for_datasets(names: List[str]) -> List[str]:
    tables: List[str] = []

    for name in names:
        schema = list_wrap(nycdb.dataset.datasets()[name]["schema"])
        tables.extend([t["table_name"] for t in schema])

    return tables
Example #2
0
def get_dataset_tables() -> List[TableInfo]:
    result: List[TableInfo] = []
    for dataset_name, info in nycdb.dataset.datasets().items():
        for schema in list_wrap(info['schema']):
            result.append(TableInfo(name=schema['table_name'], dataset=dataset_name))
        result.extend([
            TableInfo(name=name, dataset=dataset_name)
            for name in parse_nycdb_created_tables(info.get('sql', []))
        ])
    return result
Example #3
0
    def ensure_dataset(self, name: str, force_refresh: bool = False) -> None:
        dataset = nycdb.dataset.datasets()[name]
        tables: List[str] = [
            schema['table_name'] for schema in list_wrap(dataset['schema'])
        ]
        tables_str = 'table' if len(tables) == 1 else 'tables'
        print(
            f"Ensuring NYCDB dataset '{name}' is loaded with {len(tables)} {tables_str}..."
        )

        if force_refresh:
            self.drop_tables(*tables)
            self.delete_downloaded_data(*tables)
        if not self.do_tables_exist(*tables):
            print(f"Table {name} not found in the database. Downloading...")
            self.get_nycdb_dataset(name).download_files()
            print(f"Loading {name} into the database...")
            self.get_nycdb_dataset(name).db_import()
        else:
            print(f"Table {name} already exists.")
Example #4
0
    def ensure_dataset(self, name: str, force_refresh: bool = False) -> None:
        dataset = nycdb.dataset.datasets()[name]
        tables: List[str] = [
            schema['table_name']
            for schema in list_wrap(dataset['schema'])
        ]
        tables_str = 'table' if len(tables) == 1 else 'tables'
        print(f"Ensuring NYCDB dataset '{name}' is loaded with {len(tables)} {tables_str}...")

        if force_refresh:
            self.drop_tables(*tables)
            self.delete_downloaded_data(*tables)
        if not self.do_tables_exist(*tables):
            print(f"Table {name} not found in the database. Downloading...")
            self.call_nycdb('--download', name)
            print(f"Loading {name} into the database...")
            self.call_nycdb('--load', name)
        elif not self.is_testing:
            print(f"Table {name} already exists. Verifying row count...")
            self.call_nycdb('--verify', name)