Esempio n. 1
0
def test_header_case_with_meta(uppercase_meta: bool, uppercase_data: bool,
                               headers_ignore_case: bool):
    """
    Tests whether the correct result is given using headers_ignore_case and either the
    data or metadata has captilalised column names. The result is the same as:
        (uppercase_data == uppercase_meta) OR headers_ignore_case
    i.e. they're both the same case or headers_ignore_case is True.
    """

    test_folder = "tests/data/headers/"
    full_file_path = os.path.join(test_folder, "table1.csv")

    # get the meta and set the correct case for the col names
    metadata = Metadata.from_json(
        os.path.join(test_folder, "meta_data/table1.json"))
    if uppercase_meta:
        for c in metadata.columns:
            c["name"] = c["name"].upper()

    # get the data an set the correct case for the columns
    if uppercase_data:
        full_file_path = os.path.join(test_folder, "table1_uppercase.csv")
    else:
        full_file_path = os.path.join(test_folder, "table1.csv")

    # get the expected result
    expected_result = (uppercase_data == uppercase_meta) or headers_ignore_case

    # get the validator and validate
    table_params = {"headers-ignore-case": headers_ignore_case}
    pv = PandasValidator(full_file_path, table_params, metadata)
    pv.read_data_and_validate()

    # assert the result is as expected
    assert expected_result == pv.response.result["valid"]
Esempio n. 2
0
def validate_data(config: dict) -> ValidatorResult:

    validator_engine = config.get("validator-engine", "pandas")
    validator_params = config.get("validator-engine-params", {})

    all_table_responses = []

    for table_name, table_params in config["tables"].items():

        table_params["lint-response"] = []

        if table_params["matched_files"]:
            log.info(f"Linting {table_name}")

            meta_file_path = table_params.get("metadata",
                                              f"meta_data/{table_name}.json")

            meta_obj = Metadata.from_json(meta_file_path)
            meta_obj.set_col_type_category_from_types()
            metadata = meta_obj.to_dict()

            for i, matched_file in enumerate(table_params["matched_files"]):

                log.info(f"{matched_file} ...file {i+1} "
                         f"of {len(table_params['matched_files'])}")
                validator = get_validator[validator_engine](matched_file,
                                                            table_params,
                                                            metadata,
                                                            **validator_params)

                validator.read_data_and_validate()
                validator.write_validation_errors_to_log()

                table_response = {
                    "valid": validator.valid,
                    "response": validator.get_response_dict(),
                    "original-path": matched_file,
                    "table-name": table_name,
                }

                if table_response["valid"]:
                    log.info("...file passed.")
                else:
                    log.info("...file failed.")

                all_table_responses.append(table_response)

        else:
            msg4 = f"SKIPPING {table_name}. No files found."
            log.info(msg4)

    if all_table_responses:
        save_completion_status(config, all_table_responses)

    return validator.response
Esempio n. 3
0
def test_headers(file_name, expected_result):
    """
    Tests files against the _read_data_and_validate function.
    runs each file and corresponding meta (table1 or table2).
    Against the additional table config params:
    - expected-headers is False
    - expected-headers is True and ignore-case is False
    - expected-headers is True and ignore-case is True
    In that order.
    Args:
        file_name ([str]): The filename in the dir tests/data/headers/
        expected_results ([Tuple(bool)]): expected results for the 3
        different config params listed above
    """
    test_folder = "tests/data/headers/"
    full_file_path = os.path.join(test_folder, file_name)

    table_name = file_name.split(".")[0].split("_")[0]
    metadata = Metadata.from_json(
        os.path.join(test_folder, f"meta_data/{table_name}.json"))

    table_params = [
        {
            "expect-header": False
        },
        {
            "expect-header": True,
            "headers-ignore-case": False
        },
        {
            "expect-header": True,
            "headers-ignore-case": True
        },
    ]

    all_tests = []
    for table_param in table_params:
        validator = PandasValidator(full_file_path, table_param, metadata)
        validator.read_data_and_validate()
        table_response = validator.response
        all_tests.append(table_response.result["valid"])

    assert expected_result == all_tests