Ejemplo n.º 1
0
    def __init__(self,
                 config,
                 source_client=None,
                 target_client=None,
                 verbose=False):
        """Initialize a ConfigManager client which supplies the
            source and target queries to run.

        Args:
            config (Dict): The Validation config supplied
            source_client (IbisClient): The Ibis client for the source DB
            target_client (IbisClient): The Ibis client for the target DB
            verbose (Bool): If verbose, the Data Validation client will print queries run
            google_credentials (google.auth.credentials.Credentials):
                Explicit credentials to use in case default credentials
                aren't working properly.
        """
        self._state_manager = state_manager.StateManager()
        self._config = config

        self.source_client = source_client or clients.get_data_client(
            self.get_source_connection())
        self.target_client = target_client or clients.get_data_client(
            self.get_target_connection())

        self.verbose = verbose
        if self.validation_type not in consts.CONFIG_TYPES:
            raise ValueError(
                f"Unknown Configuration Type: {self.validation_type}")
def build_config_managers_from_yaml(args):
    """Returns List[ConfigManager] instances ready to be executed."""
    config_managers = []

    config_file_path = _get_arg_config_file(args)
    yaml_configs = _get_yaml_config_from_file(config_file_path)

    mgr = state_manager.StateManager()
    source_conn = mgr.get_connection_config(yaml_configs[consts.YAML_SOURCE])
    target_conn = mgr.get_connection_config(yaml_configs[consts.YAML_TARGET])

    source_client = clients.get_data_client(source_conn)
    target_client = clients.get_data_client(target_conn)

    for config in yaml_configs[consts.YAML_VALIDATIONS]:
        config[consts.CONFIG_SOURCE_CONN] = source_conn
        config[consts.CONFIG_TARGET_CONN] = target_conn
        config[consts.CONFIG_RESULT_HANDLER] = yaml_configs[
            consts.YAML_RESULT_HANDLER]
        config_manager = ConfigManager(config,
                                       source_client,
                                       target_client,
                                       verbose=args.verbose)

        config_managers.append(config_manager)

    return config_managers
def run_raw_query_against_connection(args):
    """Return results of raw query for adhoc usage."""
    mgr = state_manager.StateManager()
    client = clients.get_data_client(mgr.get_connection_config(args.conn))

    with client.raw_sql(args.query, results=True) as cur:
        return cur.fetchall()
def find_tables_using_string_matching(args):
    """Return JSON String with matched tables for use in validations."""
    score_cutoff = args.score_cutoff or 0.8

    mgr = state_manager.StateManager()
    source_client = clients.get_data_client(
        mgr.get_connection_config(args.source_conn))
    target_client = clients.get_data_client(
        mgr.get_connection_config(args.target_conn))

    allowed_schemas = cli_tools.get_arg_list(args.allowed_schemas)
    source_table_map = get_table_map(source_client,
                                     allowed_schemas=allowed_schemas)
    target_table_map = get_table_map(target_client)

    table_configs = _compare_match_tables(source_table_map,
                                          target_table_map,
                                          score_cutoff=score_cutoff)
    return json.dumps(table_configs)
def test_compile(module_under_test, fs):
    _create_table_file(TABLE_FILE_PATH, JSON_DATA)
    client = clients.get_data_client(CONN_CONFIG)
    primary_keys = ["col_a"]
    builder = module_under_test.RandomRowBuilder(primary_keys, 10)

    query = builder.compile(client, None, CONN_CONFIG["table_name"])
    df = client.execute(query)

    assert list(df.columns) == primary_keys
    assert len(df) == 10
def run_connections(args):
    """Run commands related to connection management."""
    if args.connect_cmd == "list":
        cli_tools.list_connections()
    elif args.connect_cmd == "add":
        conn = cli_tools.get_connection_config_from_args(args)
        # Test getting a client to validate connection details
        _ = clients.get_data_client(conn)
        cli_tools.store_connection(args.connection_name, conn)
    else:
        raise ValueError(
            f"Connections Argument '{args.connect_cmd}' is not supported")
Ejemplo n.º 7
0
def test_random_row_query_builder():
    bq_client = clients.get_data_client(BQ_CONN)
    row_query_builder = random_row_builder.RandomRowBuilder(["station_id"], 10)
    query = row_query_builder.compile(
        bq_client, "bigquery-public-data.new_york_citibike", "citibike_stations"
    )

    random_rows = bq_client.execute(query)

    assert query.compile() == EXPECTED_RANDOM_ROW_QUERY
    assert len(random_rows["station_id"]) == 10
    assert list(random_rows["station_id"]) != [
        4683,
        4676,
        4675,
        4674,
        4673,
        4671,
        4670,
        4666,
        4665,
        4664,
    ]
def test_get_pandas_data_client(fs):
    conn_config = SOURCE_CONN_CONFIG
    _create_table_file(SOURCE_TABLE_FILE_PATH, JSON_DATA)
    ibis_client = clients.get_data_client(conn_config)

    assert isinstance(ibis_client, PandasClient)
def test_get_oracle_data_client():
    with pytest.raises(exceptions.DataClientConnectionFailure,
                       match=r".*pip install cx_Oracle"):
        clients.get_data_client(ORACLE_CONN_CONFIG)
def build_config_managers_from_args(args):
    """Return a list of config managers ready to execute."""
    configs = []

    validate_cmd = args.validate_cmd.capitalize()
    if validate_cmd == "Schema":
        config_type = consts.SCHEMA_VALIDATION
    elif validate_cmd == "Column":
        config_type = consts.COLUMN_VALIDATION
    elif validate_cmd == "Row":
        config_type = consts.ROW_VALIDATION
    elif validate_cmd == "Custom-query":
        config_type = consts.CUSTOM_QUERY
    else:
        raise ValueError(f"Unknown Validation Type: {validate_cmd}")

    result_handler_config = None
    if args.bq_result_handler:
        result_handler_config = cli_tools.get_result_handler(
            args.bq_result_handler, args.service_account)
    elif args.result_handler_config:
        result_handler_config = cli_tools.get_result_handler(
            args.result_handler_config, args.service_account)

    # Schema validation will not accept filters, labels, or threshold as flags
    filter_config, labels, threshold = [], [], 0.0
    if config_type != consts.SCHEMA_VALIDATION:
        if args.filters:
            filter_config = cli_tools.get_filters(args.filters)
        if args.threshold:
            threshold = args.threshold
    labels = cli_tools.get_labels(args.labels)

    mgr = state_manager.StateManager()
    source_client = clients.get_data_client(
        mgr.get_connection_config(args.source_conn))
    target_client = clients.get_data_client(
        mgr.get_connection_config(args.target_conn))

    format = args.format if args.format else "table"

    use_random_rows = (None if config_type == consts.SCHEMA_VALIDATION else
                       args.use_random_row)
    random_row_batch_size = (None if config_type == consts.SCHEMA_VALIDATION
                             else args.random_row_batch_size)

    is_filesystem = source_client._source_type == "FileSystem"
    tables_list = cli_tools.get_tables_list(args.tables_list,
                                            default_value=[{}],
                                            is_filesystem=is_filesystem)

    for table_obj in tables_list:
        config_manager = ConfigManager.build_config_manager(
            config_type,
            args.source_conn,
            args.target_conn,
            table_obj,
            labels,
            threshold,
            format,
            use_random_rows=use_random_rows,
            random_row_batch_size=random_row_batch_size,
            source_client=source_client,
            target_client=target_client,
            result_handler_config=result_handler_config,
            filter_config=filter_config,
            verbose=args.verbose,
        )
        if config_type != consts.SCHEMA_VALIDATION:
            config_manager = build_config_from_args(args, config_manager)
        else:
            if args.exclusion_columns is not None:
                exclusion_columns = cli_tools.get_arg_list(
                    args.exclusion_columns)
                config_manager.append_exclusion_columns(
                    [col.casefold() for col in exclusion_columns])

        configs.append(config_manager)

    return configs