def __init__(self, config, source_client=None, target_client=None, verbose=False): """Initialize a ConfigManager client which supplies the source and target queries to run. Args: config (Dict): The Validation config supplied source_client (IbisClient): The Ibis client for the source DB target_client (IbisClient): The Ibis client for the target DB verbose (Bool): If verbose, the Data Validation client will print queries run google_credentials (google.auth.credentials.Credentials): Explicit credentials to use in case default credentials aren't working properly. """ self._state_manager = state_manager.StateManager() self._config = config self.source_client = source_client or clients.get_data_client( self.get_source_connection()) self.target_client = target_client or clients.get_data_client( self.get_target_connection()) self.verbose = verbose if self.validation_type not in consts.CONFIG_TYPES: raise ValueError( f"Unknown Configuration Type: {self.validation_type}")
def list_connections(): """List all saved connections.""" mgr = state_manager.StateManager() connections = mgr.list_connections() for conn_name in connections: print(f"Connection Name: {conn_name}")
def test_cli_store_yaml_then_run_gcs(): """Test storing and retrieving validation YAML when GCS env var is set.""" # Store BQ Connection _store_bq_conn() # Build validation and store to file parser = cli_tools.configure_arg_parser() mock_args = parser.parse_args(CLI_STORE_COLUMN_ARGS) main.run(mock_args) # Look for YAML file in GCS env directory, since that has been set yaml_file_path = os.path.join( os.environ[consts.ENV_DIRECTORY_VAR], "validations/", CLI_CONFIG_FILE ) # The number of lines is not significant, except that it represents # the exact file expected to be created. Any change to this value # is likely to be a breaking change and must be assessed. mgr = state_manager.StateManager() validation_bytes = mgr._read_file(yaml_file_path) yaml_file_str = validation_bytes.decode("utf-8") assert len(yaml_file_str.splitlines()) == EXPECTED_NUM_YAML_LINES # Run generated config using 'run-config' command run_config_args = parser.parse_args(CLI_RUN_CONFIG_ARGS) config_managers = main.build_config_managers_from_yaml(run_config_args) main.run_validations(run_config_args, config_managers) # Run generated config using 'configs run' command run_config_args = parser.parse_args(CLI_CONFIGS_RUN_ARGS) config_managers = main.build_config_managers_from_yaml(run_config_args) main.run_validations(run_config_args, config_managers)
def run_raw_query_against_connection(args): """Return results of raw query for adhoc usage.""" mgr = state_manager.StateManager() client = clients.get_data_client(mgr.get_connection_config(args.conn)) with client.raw_sql(args.query, results=True) as cur: return cur.fetchall()
def build_config_managers_from_yaml(args): """Returns List[ConfigManager] instances ready to be executed.""" config_managers = [] config_file_path = _get_arg_config_file(args) yaml_configs = _get_yaml_config_from_file(config_file_path) mgr = state_manager.StateManager() source_conn = mgr.get_connection_config(yaml_configs[consts.YAML_SOURCE]) target_conn = mgr.get_connection_config(yaml_configs[consts.YAML_TARGET]) source_client = clients.get_data_client(source_conn) target_client = clients.get_data_client(target_conn) for config in yaml_configs[consts.YAML_VALIDATIONS]: config[consts.CONFIG_SOURCE_CONN] = source_conn config[consts.CONFIG_TARGET_CONN] = target_conn config[consts.CONFIG_RESULT_HANDLER] = yaml_configs[ consts.YAML_RESULT_HANDLER] config_manager = ConfigManager(config, source_client, target_client, verbose=args.verbose) config_managers.append(config_manager) return config_managers
def list_validations(): """List all saved validation YAMLs.""" mgr = state_manager.StateManager() validations = mgr.list_validations() print("Validation YAMLs found:") for validation_name in validations: print(f"{validation_name}.yaml")
def test_create_unknown_filepath(capsys, fs): # Unknown file paths will be created by the state manager files_directory = "create/this/path/" manager = state_manager.StateManager(files_directory) manager.create_connection(TEST_CONN_NAME, TEST_CONN) connections = manager.list_connections() assert connections == [TEST_CONN_NAME] file_path = manager._get_connection_path(TEST_CONN_NAME) expected_file_path = files_directory + f"{TEST_CONN_NAME}.connection.json" assert file_path == expected_file_path
def find_tables_using_string_matching(args): """Return JSON String with matched tables for use in validations.""" score_cutoff = args.score_cutoff or 0.8 mgr = state_manager.StateManager() source_client = clients.get_data_client( mgr.get_connection_config(args.source_conn)) target_client = clients.get_data_client( mgr.get_connection_config(args.target_conn)) allowed_schemas = cli_tools.get_arg_list(args.allowed_schemas) source_table_map = get_table_map(source_client, allowed_schemas=allowed_schemas) target_table_map = get_table_map(target_client) table_configs = _compare_match_tables(source_table_map, target_table_map, score_cutoff=score_cutoff) return json.dumps(table_configs)
def get_validation(validation_name): """Return validation YAML for a specific connection.""" mgr = state_manager.StateManager() return mgr.get_validation_config(validation_name)
def store_validation(validation_file_name, yaml_config): """Store the validation YAML config under the given name.""" mgr = state_manager.StateManager() mgr.create_validation_yaml(validation_file_name, yaml_config)
def get_connection(connection_name): """Return dict connection details for a specific connection.""" mgr = state_manager.StateManager() return mgr.get_connection_config(connection_name)
def store_connection(connection_name, conn): """Store the connection config under the given name.""" mgr = state_manager.StateManager() mgr.create_connection(connection_name, conn)
def test_create_and_list_connection(capsys, fs): manager = state_manager.StateManager() manager.create_connection(TEST_CONN_NAME, TEST_CONN) connections = manager.list_connections() assert connections == [TEST_CONN_NAME]
def build_config_managers_from_args(args): """Return a list of config managers ready to execute.""" configs = [] validate_cmd = args.validate_cmd.capitalize() if validate_cmd == "Schema": config_type = consts.SCHEMA_VALIDATION elif validate_cmd == "Column": config_type = consts.COLUMN_VALIDATION elif validate_cmd == "Row": config_type = consts.ROW_VALIDATION elif validate_cmd == "Custom-query": config_type = consts.CUSTOM_QUERY else: raise ValueError(f"Unknown Validation Type: {validate_cmd}") result_handler_config = None if args.bq_result_handler: result_handler_config = cli_tools.get_result_handler( args.bq_result_handler, args.service_account) elif args.result_handler_config: result_handler_config = cli_tools.get_result_handler( args.result_handler_config, args.service_account) # Schema validation will not accept filters, labels, or threshold as flags filter_config, labels, threshold = [], [], 0.0 if config_type != consts.SCHEMA_VALIDATION: if args.filters: filter_config = cli_tools.get_filters(args.filters) if args.threshold: threshold = args.threshold labels = cli_tools.get_labels(args.labels) mgr = state_manager.StateManager() source_client = clients.get_data_client( mgr.get_connection_config(args.source_conn)) target_client = clients.get_data_client( mgr.get_connection_config(args.target_conn)) format = args.format if args.format else "table" use_random_rows = (None if config_type == consts.SCHEMA_VALIDATION else args.use_random_row) random_row_batch_size = (None if config_type == consts.SCHEMA_VALIDATION else args.random_row_batch_size) is_filesystem = source_client._source_type == "FileSystem" tables_list = cli_tools.get_tables_list(args.tables_list, default_value=[{}], is_filesystem=is_filesystem) for table_obj in tables_list: config_manager = ConfigManager.build_config_manager( config_type, args.source_conn, args.target_conn, table_obj, labels, threshold, format, use_random_rows=use_random_rows, random_row_batch_size=random_row_batch_size, source_client=source_client, target_client=target_client, result_handler_config=result_handler_config, filter_config=filter_config, verbose=args.verbose, ) if config_type != consts.SCHEMA_VALIDATION: config_manager = build_config_from_args(args, config_manager) else: if args.exclusion_columns is not None: exclusion_columns = cli_tools.get_arg_list( args.exclusion_columns) config_manager.append_exclusion_columns( [col.casefold() for col in exclusion_columns]) configs.append(config_manager) return configs
def test_create_and_list_validation(capsys, fs): manager = state_manager.StateManager() manager.create_validation_yaml(TEST_VALIDATION_NAME, TEST_VALIDATION_CONFIG) validations = manager.list_validations() assert validations == [TEST_VALIDATION_NAME.split(".")[0]]
def test_create_and_get_validation_config(capsys, fs): manager = state_manager.StateManager() manager.create_validation_yaml(TEST_VALIDATION_NAME, TEST_VALIDATION_CONFIG) config = manager.get_validation_config(TEST_VALIDATION_NAME) assert config == TEST_VALIDATION_CONFIG
def _remove_spanner_conn(): mgr = state_manager.StateManager() file_path = mgr._get_connection_path(SPANNER_CONN_NAME) os.remove(file_path)
def test_create_and_get_connection_config(capsys, fs): manager = state_manager.StateManager() manager.create_connection(TEST_CONN_NAME, TEST_CONN) config = manager.get_connection_config(TEST_CONN_NAME) assert config == TEST_CONN