def find_schema_differences( module_path: str, bigquery_client: BigQueryClient, global_project: Optional[str], global_dataset: Optional[str], ) -> _SchemaDiffs: schema_diffs: _SchemaDiffs = {} for local_table in find_tables(module_path): project = global_project or local_table.project assert project, "Project has not been set." dataset = global_dataset or local_table.dataset assert dataset, "Dataset has not been set." table_identifier = f"{project}.{dataset}.{local_table.full_table_name()}" try: remote_table = bigquery_client.get_table(table_identifier) if list( check_schemas(local_table.get_schema_fields(), remote_table.schema)): schema_diffs[table_identifier] = ExistingTable( local_table=local_table, remote_table=remote_table, schema_diffs=list( check_schemas(local_table.get_schema_fields(), remote_table.schema)), ) except NotFound: schema_diffs[table_identifier] = MissingTable( local_table=local_table) return schema_diffs
def test_table_finder(): file_path = pathlib.Path(__file__).parent tables_dir = os.path.join(file_path, "tables") table_names = {t.name for t in find_tables(tables_dir, True)} expected_table_names = { "first_table", "second_table", "second_abstract_table" } assert expected_table_names == table_names
def main( project: Optional[str], dataset: Optional[str], module_path: str, apply: bool, validate: bool, ) -> None: client = create_connection() for local_table in set(find_tables(module_path)): project = project or local_table.project assert project, "Project has not been set." dataset = dataset or local_table.dataset assert dataset, "Dataset has not been set." table_identifier = f"{project}.{dataset}.{local_table.full_table_name()}" print(f"Checking migrations for: {table_identifier}") try: remote_table = client.get_table(table_identifier) except NotFound as not_found: table_exists_msg = f"Table does not exist in bq: {table_identifier}" if validate: raise Exception(table_exists_msg) from not_found print(table_exists_msg) if apply: print("Creating table.") table = Table( table_identifier, schema=local_table.get_schema_fields(), ) if local_table.time_partitioning: table.time_partitioning = local_table.time_partitioning print(client.create_table(table)) else: new_columns = list( find_new_columns(local_table.get_schema_fields(), remote_table.schema)) if new_columns: new_columns_message = f"Found new columns: {new_columns}" if validate: raise Exception(new_columns_message) print(new_columns_message) if apply: print("Applying changes") remote_table.schema = local_table.get_schema_fields() print(client.update_table(remote_table, ["schema"]))
def test_table_schema_fields(): file_path = pathlib.Path(__file__).parent tables_dir = os.path.join(file_path, "tables") for local_table in find_tables(tables_dir, True): schema_fields = local_table.get_schema_fields() assert len(schema_fields) > 0
def test_table_finder_fail_on_missing_implementation(): with pytest.raises(NotImplementedError): file_path = pathlib.Path(__file__).parent tables_dir = os.path.join(file_path, "tables") _ = {t.name for t in find_tables(tables_dir, False)}