Esempio n. 1
0
def find_schema_differences(
    module_path: str,
    bigquery_client: BigQueryClient,
    global_project: Optional[str],
    global_dataset: Optional[str],
) -> _SchemaDiffs:
    schema_diffs: _SchemaDiffs = {}
    for local_table in find_tables(module_path):
        project = global_project or local_table.project
        assert project, "Project has not been set."
        dataset = global_dataset or local_table.dataset
        assert dataset, "Dataset has not been set."

        table_identifier = f"{project}.{dataset}.{local_table.full_table_name()}"

        try:
            remote_table = bigquery_client.get_table(table_identifier)
            if list(
                    check_schemas(local_table.get_schema_fields(),
                                  remote_table.schema)):
                schema_diffs[table_identifier] = ExistingTable(
                    local_table=local_table,
                    remote_table=remote_table,
                    schema_diffs=list(
                        check_schemas(local_table.get_schema_fields(),
                                      remote_table.schema)),
                )
        except NotFound:
            schema_diffs[table_identifier] = MissingTable(
                local_table=local_table)

    return schema_diffs
Esempio n. 2
0
def test_table_finder():
    file_path = pathlib.Path(__file__).parent
    tables_dir = os.path.join(file_path, "tables")
    table_names = {t.name for t in find_tables(tables_dir, True)}
    expected_table_names = {
        "first_table", "second_table", "second_abstract_table"
    }
    assert expected_table_names == table_names
Esempio n. 3
0
def main(
    project: Optional[str],
    dataset: Optional[str],
    module_path: str,
    apply: bool,
    validate: bool,
) -> None:
    client = create_connection()
    for local_table in set(find_tables(module_path)):
        project = project or local_table.project
        assert project, "Project has not been set."
        dataset = dataset or local_table.dataset
        assert dataset, "Dataset has not been set."

        table_identifier = f"{project}.{dataset}.{local_table.full_table_name()}"
        print(f"Checking migrations for: {table_identifier}")

        try:
            remote_table = client.get_table(table_identifier)
        except NotFound as not_found:
            table_exists_msg = f"Table does not exist in bq: {table_identifier}"
            if validate:
                raise Exception(table_exists_msg) from not_found

            print(table_exists_msg)
            if apply:
                print("Creating table.")
                table = Table(
                    table_identifier,
                    schema=local_table.get_schema_fields(),
                )
                if local_table.time_partitioning:
                    table.time_partitioning = local_table.time_partitioning
                print(client.create_table(table))
        else:
            new_columns = list(
                find_new_columns(local_table.get_schema_fields(),
                                 remote_table.schema))
            if new_columns:
                new_columns_message = f"Found new columns: {new_columns}"
                if validate:
                    raise Exception(new_columns_message)
                print(new_columns_message)
                if apply:
                    print("Applying changes")
                    remote_table.schema = local_table.get_schema_fields()
                    print(client.update_table(remote_table, ["schema"]))
Esempio n. 4
0
def test_table_schema_fields():
    file_path = pathlib.Path(__file__).parent
    tables_dir = os.path.join(file_path, "tables")
    for local_table in find_tables(tables_dir, True):
        schema_fields = local_table.get_schema_fields()
        assert len(schema_fields) > 0
Esempio n. 5
0
def test_table_finder_fail_on_missing_implementation():
    with pytest.raises(NotImplementedError):
        file_path = pathlib.Path(__file__).parent
        tables_dir = os.path.join(file_path, "tables")
        _ = {t.name for t in find_tables(tables_dir, False)}