def verify_raw_results(test_section, exec_result, file_format, result_section, type_section='TYPES', update_section=False, replace_filenames=True): """ Accepts a raw exec_result object and verifies it matches the expected results, including checking the ERRORS, TYPES, and LABELS test sections. If update_section is true, updates test_section with the actual results if they don't match the expected results. If update_section is false, failed verifications result in assertion failures, otherwise they are ignored. This process includes the parsing/transformation of the raw data results into the result format used in the tests. The result_section parameter can be used to make this function check the results in a DML_RESULTS section instead of the regular RESULTS section. The 'type_section' parameter can be used to make this function check the types against an alternative section from the default TYPES. TODO: separate out the handling of sections like ERRORS from checking of query results to allow regular RESULTS/ERRORS sections in tests with DML_RESULTS (IMPALA-4471). """ expected_results = None if result_section in test_section: expected_results = remove_comments(test_section[result_section]) else: assert 'ERRORS' not in test_section, "'ERRORS' section must have accompanying 'RESULTS' section" LOG.info("No results found. Skipping verification") return if 'ERRORS' in test_section: expected_errors = split_section_lines(remove_comments(test_section['ERRORS'])) actual_errors = apply_error_match_filter(exec_result.log.split('\n'), replace_filenames) try: verify_errors(expected_errors, actual_errors) except AssertionError: if update_section: test_section['ERRORS'] = join_section_lines(actual_errors) else: raise if type_section in test_section: # Distinguish between an empty list and a list with an empty string. section = test_section[type_section] expected_types = [c.strip().upper() for c in remove_comments(section).rstrip('\n').split(',')] # Avro and Kudu represent TIMESTAMP columns as strings, so tests using TIMESTAMP are # skipped because results will be wrong. if file_format in ('avro', 'kudu') and 'TIMESTAMP' in expected_types: LOG.info("TIMESTAMP columns unsupported in %s, skipping verification." %\ file_format) return # Avro does not support as many types as Hive, so the Avro test tables may # have different column types than we expect (e.g., INT instead of # TINYINT). Bypass the type checking by ignoring the actual types of the Avro # table. if file_format == 'avro': LOG.info("Skipping type verification of Avro-format table.") actual_types = expected_types else: actual_types = exec_result.column_types try: verify_results(expected_types, actual_types, order_matters=True) except AssertionError: if update_section: test_section['TYPES'] = join_section_lines([', '.join(actual_types)]) else: raise else: # This is an insert, so we are comparing the number of rows inserted expected_types = ['BIGINT'] actual_types = ['BIGINT'] actual_labels = ['DUMMY_LABEL'] if exec_result and exec_result.column_labels: actual_labels = exec_result.column_labels if 'LABELS' in test_section: assert actual_labels is not None # Distinguish between an empty list and a list with an empty string. expected_labels = list() if test_section.get('LABELS'): expected_labels = [c.strip().upper() for c in test_section['LABELS'].split(',')] try: verify_results(expected_labels, actual_labels, order_matters=True) except AssertionError: if update_section: test_section['LABELS'] = join_section_lines([', '.join(actual_labels)]) else: raise # Get the verifier if specified. In the absence of an explicit # verifier, defaults to verifying equality. verifier = test_section.get('VERIFIER') order_matters = contains_order_by(exec_result.query) # If the test section is explicitly annotated to specify the order matters, # then do not sort the actual and expected results. if verifier and verifier.upper() == 'VERIFY_IS_EQUAL': order_matters = True # If the test result section is explicitly annotated to specify order does not matter, # then sort the actual and expected results before verification. if verifier and verifier.upper() == 'VERIFY_IS_EQUAL_SORTED': order_matters = False expected_results_list = [] if 'MULTI_LINE' in test_section: expected_results_list = map(lambda s: s.replace('\n', '\\n'), re.findall(r'\[(.*?)\]', expected_results, flags=re.DOTALL)) else: expected_results_list = split_section_lines(expected_results) expected = QueryTestResult(expected_results_list, expected_types, actual_labels, order_matters) actual = QueryTestResult(parse_result_rows(exec_result), actual_types, actual_labels, order_matters) assert verifier in VERIFIER_MAP.keys(), "Unknown verifier: " + verifier try: VERIFIER_MAP[verifier](expected, actual) except AssertionError: if update_section: test_section[result_section] = join_section_lines(actual.result_list) else: raise
def verify_raw_results(test_section, exec_result, file_format, update_section=False, replace_filenames=True, result_section='RESULTS'): """ Accepts a raw exec_result object and verifies it matches the expected results, including checking the ERRORS, TYPES, and LABELS test sections. If update_section is true, updates test_section with the actual results if they don't match the expected results. If update_section is false, failed verifications result in assertion failures, otherwise they are ignored. This process includes the parsing/transformation of the raw data results into the result format used in the tests. The result_section parameter can be used to make this function check the results in a DML_RESULTS section instead of the regular RESULTS section. TODO: separate out the handling of sections like ERRORS from checking of query results to allow regular RESULTS/ERRORS sections in tests with DML_RESULTS (IMPALA-4471). """ expected_results = None if result_section in test_section: expected_results = remove_comments(test_section[result_section]) else: assert 'ERRORS' not in test_section, "'ERRORS' section must have accompanying 'RESULTS' section" LOG.info("No results found. Skipping verification"); return if 'ERRORS' in test_section: expected_errors = split_section_lines(remove_comments(test_section['ERRORS'])) actual_errors = apply_error_match_filter(exec_result.log.split('\n'), replace_filenames) try: verify_errors(expected_errors, actual_errors) except AssertionError: if update_section: test_section['ERRORS'] = join_section_lines(actual_errors) else: raise if 'TYPES' in test_section: # Distinguish between an empty list and a list with an empty string. expected_types = list() if test_section.get('TYPES'): expected_types = [c.strip().upper() for c in test_section['TYPES'].rstrip('\n').split(',')] # Avro and Kudu represent TIMESTAMP columns as strings, so tests using TIMESTAMP are # skipped because results will be wrong. if file_format in ('avro', 'kudu') and 'TIMESTAMP' in expected_types: LOG.info("TIMESTAMP columns unsupported in %s, skipping verification." %\ file_format) return # Avro does not support as many types as Hive, so the Avro test tables may # have different column types than we expect (e.g., INT instead of # TINYINT). Bypass the type checking by ignoring the actual types of the Avro # table. if file_format == 'avro': LOG.info("Skipping type verification of Avro-format table.") actual_types = expected_types else: actual_types = parse_column_types(exec_result.schema) try: verify_results(expected_types, actual_types, order_matters=True) except AssertionError: if update_section: test_section['TYPES'] = join_section_lines([', '.join(actual_types)]) else: raise else: # This is an insert, so we are comparing the number of rows inserted expected_types = ['BIGINT'] actual_types = ['BIGINT'] actual_labels = ['DUMMY_LABEL'] if exec_result and exec_result.schema: actual_labels = parse_column_labels(exec_result.schema) if 'LABELS' in test_section: assert actual_labels is not None # Distinguish between an empty list and a list with an empty string. expected_labels = list() if test_section.get('LABELS'): expected_labels = [c.strip().upper() for c in test_section['LABELS'].split(',')] try: verify_results(expected_labels, actual_labels, order_matters=True) except AssertionError: if update_section: test_section['LABELS'] = join_section_lines([', '.join(actual_labels)]) else: raise # Get the verifier if specified. In the absence of an explicit # verifier, defaults to verifying equality. verifier = test_section.get('VERIFIER') order_matters = contains_order_by(exec_result.query) # If the test section is explicitly annotated to specify the order matters, # then do not sort the actual and expected results. if verifier and verifier.upper() == 'VERIFY_IS_EQUAL': order_matters = True # If the test result section is explicitly annotated to specify order does not matter, # then sort the actual and expected results before verification. if verifier and verifier.upper() == 'VERIFY_IS_EQUAL_SORTED': order_matters = False expected_results_list = [] if 'MULTI_LINE' in test_section: expected_results_list = map(lambda s: s.replace('\n', '\\n'), re.findall(r'\[(.*?)\]', expected_results, flags=re.DOTALL)) else: expected_results_list = split_section_lines(expected_results) expected = QueryTestResult(expected_results_list, expected_types, actual_labels, order_matters) actual = QueryTestResult(parse_result_rows(exec_result), actual_types, actual_labels, order_matters) assert verifier in VERIFIER_MAP.keys(), "Unknown verifier: " + verifier try: VERIFIER_MAP[verifier](expected, actual) except AssertionError: if update_section: test_section[result_section] = join_section_lines(actual.result_list) else: raise
def verify_raw_results(test_section, exec_result, file_format, update_section=False): """ Accepts a raw exec_result object and verifies it matches the expected results. If update_section is true, updates test_section with the actual results if they don't match the expected results. If update_section is false, failed verifications result in assertion failures, otherwise they are ignored. This process includes the parsing/transformation of the raw data results into the result format used in the tests. """ expected_results = None if 'RESULTS' in test_section: expected_results = remove_comments(test_section['RESULTS']) else: LOG.info("No results found. Skipping verification"); return if 'ERRORS' in test_section: expected_errors = split_section_lines(remove_comments(test_section['ERRORS'])) actual_errors = apply_error_match_filter(exec_result.log.split('\n')) try: verify_errors(expected_errors, actual_errors) except AssertionError: if update_section: test_section['ERRORS'] = join_section_lines(actual_errors) else: raise if 'TYPES' in test_section: # Distinguish between an empty list and a list with an empty string. expected_types = list() if test_section.get('TYPES'): expected_types = [c.strip().upper() for c in test_section['TYPES'].rstrip('\n').split(',')] # Avro does not support as many types as Hive, so the Avro test tables may # have different column types than we expect (e.g., INT instead of # TINYINT). We represent TIMESTAMP columns as strings in Avro, so we bail in # this case since the results will be wrong. Otherwise we bypass the type # checking by ignoring the actual types of the Avro table. if file_format == 'avro': if 'TIMESTAMP' in expected_types: LOG.info("TIMESTAMP columns unsupported in Avro, skipping verification.") return LOG.info("Skipping type verification of Avro-format table.") actual_types = expected_types else: actual_types = parse_column_types(exec_result.schema) try: verify_results(expected_types, actual_types, order_matters=True) except AssertionError: if update_section: test_section['TYPES'] = join_section_lines([', '.join(actual_types)]) else: raise else: # This is an insert, so we are comparing the number of rows inserted expected_types = ['BIGINT'] actual_types = ['BIGINT'] actual_labels = ['DUMMY_LABEL'] if exec_result and exec_result.schema: actual_labels = parse_column_labels(exec_result.schema) if 'LABELS' in test_section: assert actual_labels is not None # Distinguish between an empty list and a list with an empty string. expected_labels = list() if test_section.get('LABELS'): expected_labels = [c.strip().upper() for c in test_section['LABELS'].split(',')] try: verify_results(expected_labels, actual_labels, order_matters=True) except AssertionError: if update_section: test_section['LABELS'] = join_section_lines([', '.join(actual_labels)]) else: raise # Get the verifier if specified. In the absence of an explicit # verifier, defaults to verifying equality. verifier = test_section.get('VERIFIER') order_matters = contains_order_by(exec_result.query) # If the test section is explicitly annotated to specify the order matters, # then do not sort the actual and expected results. if verifier and verifier.upper() == 'VERIFY_IS_EQUAL': order_matters = True # If the test result section is explicitly annotated to specify order does not matter, # then sort the actual and expected results before verification. if verifier and verifier.upper() == 'VERIFY_IS_EQUAL_SORTED': order_matters = False expected_results_list = [] if 'MULTI_LINE' in test_section: expected_results_list = map(lambda s: s.replace('\n', '\\n'), re.findall(r'\[(.*?)\]', expected_results, flags=re.DOTALL)) else: expected_results_list = split_section_lines(expected_results) expected = QueryTestResult(expected_results_list, expected_types, actual_labels, order_matters) actual = QueryTestResult(parse_result_rows(exec_result), actual_types, actual_labels, order_matters) assert verifier in VERIFIER_MAP.keys(), "Unknown verifier: " + verifier try: VERIFIER_MAP[verifier](expected, actual) except AssertionError: if update_section: test_section['RESULTS'] = join_section_lines(actual.result_list) else: raise