Ejemplo n.º 1
0
    def __init__(self,
                 config,
                 source_client=None,
                 target_client=None,
                 verbose=False):
        """Initialize a ConfigManager client which supplies the
            source and target queries to run.

        Args:
            config (Dict): The Validation config supplied
            source_client (IbisClient): The Ibis client for the source DB
            target_client (IbisClient): The Ibis client for the target DB
            verbose (Bool): If verbose, the Data Validation client will print queries run
            google_credentials (google.auth.credentials.Credentials):
                Explicit credentials to use in case default credentials
                aren't working properly.
        """
        self._state_manager = state_manager.StateManager()
        self._config = config

        self.source_client = source_client or clients.get_data_client(
            self.get_source_connection())
        self.target_client = target_client or clients.get_data_client(
            self.get_target_connection())

        self.verbose = verbose
        if self.validation_type not in consts.CONFIG_TYPES:
            raise ValueError(
                f"Unknown Configuration Type: {self.validation_type}")
Ejemplo n.º 2
0
def list_connections():
    """List all saved connections."""
    mgr = state_manager.StateManager()
    connections = mgr.list_connections()

    for conn_name in connections:
        print(f"Connection Name: {conn_name}")
Ejemplo n.º 3
0
def test_cli_store_yaml_then_run_gcs():
    """Test storing and retrieving validation YAML when GCS env var is set."""
    # Store BQ Connection
    _store_bq_conn()

    # Build validation and store to file
    parser = cli_tools.configure_arg_parser()
    mock_args = parser.parse_args(CLI_STORE_COLUMN_ARGS)
    main.run(mock_args)

    # Look for YAML file in GCS env directory, since that has been set
    yaml_file_path = os.path.join(
        os.environ[consts.ENV_DIRECTORY_VAR], "validations/", CLI_CONFIG_FILE
    )

    # The number of lines is not significant, except that it represents
    # the exact file expected to be created.  Any change to this value
    # is likely to be a breaking change and must be assessed.
    mgr = state_manager.StateManager()
    validation_bytes = mgr._read_file(yaml_file_path)
    yaml_file_str = validation_bytes.decode("utf-8")
    assert len(yaml_file_str.splitlines()) == EXPECTED_NUM_YAML_LINES

    # Run generated config using 'run-config' command
    run_config_args = parser.parse_args(CLI_RUN_CONFIG_ARGS)
    config_managers = main.build_config_managers_from_yaml(run_config_args)
    main.run_validations(run_config_args, config_managers)

    # Run generated config using 'configs run' command
    run_config_args = parser.parse_args(CLI_CONFIGS_RUN_ARGS)
    config_managers = main.build_config_managers_from_yaml(run_config_args)
    main.run_validations(run_config_args, config_managers)
def run_raw_query_against_connection(args):
    """Return results of raw query for adhoc usage."""
    mgr = state_manager.StateManager()
    client = clients.get_data_client(mgr.get_connection_config(args.conn))

    with client.raw_sql(args.query, results=True) as cur:
        return cur.fetchall()
def build_config_managers_from_yaml(args):
    """Returns List[ConfigManager] instances ready to be executed."""
    config_managers = []

    config_file_path = _get_arg_config_file(args)
    yaml_configs = _get_yaml_config_from_file(config_file_path)

    mgr = state_manager.StateManager()
    source_conn = mgr.get_connection_config(yaml_configs[consts.YAML_SOURCE])
    target_conn = mgr.get_connection_config(yaml_configs[consts.YAML_TARGET])

    source_client = clients.get_data_client(source_conn)
    target_client = clients.get_data_client(target_conn)

    for config in yaml_configs[consts.YAML_VALIDATIONS]:
        config[consts.CONFIG_SOURCE_CONN] = source_conn
        config[consts.CONFIG_TARGET_CONN] = target_conn
        config[consts.CONFIG_RESULT_HANDLER] = yaml_configs[
            consts.YAML_RESULT_HANDLER]
        config_manager = ConfigManager(config,
                                       source_client,
                                       target_client,
                                       verbose=args.verbose)

        config_managers.append(config_manager)

    return config_managers
Ejemplo n.º 6
0
def list_validations():
    """List all saved validation YAMLs."""
    mgr = state_manager.StateManager()
    validations = mgr.list_validations()

    print("Validation YAMLs found:")
    for validation_name in validations:
        print(f"{validation_name}.yaml")
Ejemplo n.º 7
0
def test_create_unknown_filepath(capsys, fs):
    # Unknown file paths will be created by the state manager
    files_directory = "create/this/path/"
    manager = state_manager.StateManager(files_directory)
    manager.create_connection(TEST_CONN_NAME, TEST_CONN)

    connections = manager.list_connections()
    assert connections == [TEST_CONN_NAME]

    file_path = manager._get_connection_path(TEST_CONN_NAME)
    expected_file_path = files_directory + f"{TEST_CONN_NAME}.connection.json"
    assert file_path == expected_file_path
def find_tables_using_string_matching(args):
    """Return JSON String with matched tables for use in validations."""
    score_cutoff = args.score_cutoff or 0.8

    mgr = state_manager.StateManager()
    source_client = clients.get_data_client(
        mgr.get_connection_config(args.source_conn))
    target_client = clients.get_data_client(
        mgr.get_connection_config(args.target_conn))

    allowed_schemas = cli_tools.get_arg_list(args.allowed_schemas)
    source_table_map = get_table_map(source_client,
                                     allowed_schemas=allowed_schemas)
    target_table_map = get_table_map(target_client)

    table_configs = _compare_match_tables(source_table_map,
                                          target_table_map,
                                          score_cutoff=score_cutoff)
    return json.dumps(table_configs)
Ejemplo n.º 9
0
def get_validation(validation_name):
    """Return validation YAML for a specific connection."""
    mgr = state_manager.StateManager()
    return mgr.get_validation_config(validation_name)
Ejemplo n.º 10
0
def store_validation(validation_file_name, yaml_config):
    """Store the validation YAML config under the given name."""
    mgr = state_manager.StateManager()
    mgr.create_validation_yaml(validation_file_name, yaml_config)
Ejemplo n.º 11
0
def get_connection(connection_name):
    """Return dict connection details for a specific connection."""
    mgr = state_manager.StateManager()
    return mgr.get_connection_config(connection_name)
Ejemplo n.º 12
0
def store_connection(connection_name, conn):
    """Store the connection config under the given name."""
    mgr = state_manager.StateManager()
    mgr.create_connection(connection_name, conn)
Ejemplo n.º 13
0
def test_create_and_list_connection(capsys, fs):
    manager = state_manager.StateManager()
    manager.create_connection(TEST_CONN_NAME, TEST_CONN)

    connections = manager.list_connections()
    assert connections == [TEST_CONN_NAME]
def build_config_managers_from_args(args):
    """Return a list of config managers ready to execute."""
    configs = []

    validate_cmd = args.validate_cmd.capitalize()
    if validate_cmd == "Schema":
        config_type = consts.SCHEMA_VALIDATION
    elif validate_cmd == "Column":
        config_type = consts.COLUMN_VALIDATION
    elif validate_cmd == "Row":
        config_type = consts.ROW_VALIDATION
    elif validate_cmd == "Custom-query":
        config_type = consts.CUSTOM_QUERY
    else:
        raise ValueError(f"Unknown Validation Type: {validate_cmd}")

    result_handler_config = None
    if args.bq_result_handler:
        result_handler_config = cli_tools.get_result_handler(
            args.bq_result_handler, args.service_account)
    elif args.result_handler_config:
        result_handler_config = cli_tools.get_result_handler(
            args.result_handler_config, args.service_account)

    # Schema validation will not accept filters, labels, or threshold as flags
    filter_config, labels, threshold = [], [], 0.0
    if config_type != consts.SCHEMA_VALIDATION:
        if args.filters:
            filter_config = cli_tools.get_filters(args.filters)
        if args.threshold:
            threshold = args.threshold
    labels = cli_tools.get_labels(args.labels)

    mgr = state_manager.StateManager()
    source_client = clients.get_data_client(
        mgr.get_connection_config(args.source_conn))
    target_client = clients.get_data_client(
        mgr.get_connection_config(args.target_conn))

    format = args.format if args.format else "table"

    use_random_rows = (None if config_type == consts.SCHEMA_VALIDATION else
                       args.use_random_row)
    random_row_batch_size = (None if config_type == consts.SCHEMA_VALIDATION
                             else args.random_row_batch_size)

    is_filesystem = source_client._source_type == "FileSystem"
    tables_list = cli_tools.get_tables_list(args.tables_list,
                                            default_value=[{}],
                                            is_filesystem=is_filesystem)

    for table_obj in tables_list:
        config_manager = ConfigManager.build_config_manager(
            config_type,
            args.source_conn,
            args.target_conn,
            table_obj,
            labels,
            threshold,
            format,
            use_random_rows=use_random_rows,
            random_row_batch_size=random_row_batch_size,
            source_client=source_client,
            target_client=target_client,
            result_handler_config=result_handler_config,
            filter_config=filter_config,
            verbose=args.verbose,
        )
        if config_type != consts.SCHEMA_VALIDATION:
            config_manager = build_config_from_args(args, config_manager)
        else:
            if args.exclusion_columns is not None:
                exclusion_columns = cli_tools.get_arg_list(
                    args.exclusion_columns)
                config_manager.append_exclusion_columns(
                    [col.casefold() for col in exclusion_columns])

        configs.append(config_manager)

    return configs
Ejemplo n.º 15
0
def test_create_and_list_validation(capsys, fs):
    manager = state_manager.StateManager()
    manager.create_validation_yaml(TEST_VALIDATION_NAME, TEST_VALIDATION_CONFIG)

    validations = manager.list_validations()
    assert validations == [TEST_VALIDATION_NAME.split(".")[0]]
Ejemplo n.º 16
0
def test_create_and_get_validation_config(capsys, fs):
    manager = state_manager.StateManager()
    manager.create_validation_yaml(TEST_VALIDATION_NAME, TEST_VALIDATION_CONFIG)

    config = manager.get_validation_config(TEST_VALIDATION_NAME)
    assert config == TEST_VALIDATION_CONFIG
Ejemplo n.º 17
0
def _remove_spanner_conn():
    mgr = state_manager.StateManager()
    file_path = mgr._get_connection_path(SPANNER_CONN_NAME)
    os.remove(file_path)
Ejemplo n.º 18
0
def test_create_and_get_connection_config(capsys, fs):
    manager = state_manager.StateManager()
    manager.create_connection(TEST_CONN_NAME, TEST_CONN)

    config = manager.get_connection_config(TEST_CONN_NAME)
    assert config == TEST_CONN