def verify_raw_results(test_section, exec_result, file_format, result_section,
                       type_section='TYPES', update_section=False,
                       replace_filenames=True):
  """
  Accepts a raw exec_result object and verifies it matches the expected results,
  including checking the ERRORS, TYPES, and LABELS test sections.
  If update_section is true, updates test_section with the actual results
  if they don't match the expected results. If update_section is false, failed
  verifications result in assertion failures, otherwise they are ignored.

  This process includes the parsing/transformation of the raw data results into the
  result format used in the tests.

  The result_section parameter can be used to make this function check the results in
  a DML_RESULTS section instead of the regular RESULTS section.

  The 'type_section' parameter can be used to make this function check the types against
  an alternative section from the default TYPES.
  TODO: separate out the handling of sections like ERRORS from checking of query results
  to allow regular RESULTS/ERRORS sections in tests with DML_RESULTS (IMPALA-4471).
  """
  expected_results = None
  if result_section in test_section:
    expected_results = remove_comments(test_section[result_section])
  else:
    assert 'ERRORS' not in test_section, "'ERRORS' section must have accompanying 'RESULTS' section"
    LOG.info("No results found. Skipping verification")
    return
  if 'ERRORS' in test_section:
    expected_errors = split_section_lines(remove_comments(test_section['ERRORS']))
    actual_errors = apply_error_match_filter(exec_result.log.split('\n'),
                                             replace_filenames)
    try:
      verify_errors(expected_errors, actual_errors)
    except AssertionError:
      if update_section:
        test_section['ERRORS'] = join_section_lines(actual_errors)
      else:
        raise

  if type_section in test_section:
    # Distinguish between an empty list and a list with an empty string.
    section = test_section[type_section]
    expected_types = [c.strip().upper()
                      for c in remove_comments(section).rstrip('\n').split(',')]

    # Avro and Kudu represent TIMESTAMP columns as strings, so tests using TIMESTAMP are
    # skipped because results will be wrong.
    if file_format in ('avro', 'kudu') and 'TIMESTAMP' in expected_types:
        LOG.info("TIMESTAMP columns unsupported in %s, skipping verification." %\
            file_format)
        return

    # Avro does not support as many types as Hive, so the Avro test tables may
    # have different column types than we expect (e.g., INT instead of
    # TINYINT). Bypass the type checking by ignoring the actual types of the Avro
    # table.
    if file_format == 'avro':
      LOG.info("Skipping type verification of Avro-format table.")
      actual_types = expected_types
    else:
      actual_types = exec_result.column_types

    try:
      verify_results(expected_types, actual_types, order_matters=True)
    except AssertionError:
      if update_section:
        test_section['TYPES'] = join_section_lines([', '.join(actual_types)])
      else:
        raise
  else:
    # This is an insert, so we are comparing the number of rows inserted
    expected_types = ['BIGINT']
    actual_types = ['BIGINT']

  actual_labels = ['DUMMY_LABEL']
  if exec_result and exec_result.column_labels:
    actual_labels = exec_result.column_labels

  if 'LABELS' in test_section:
    assert actual_labels is not None
    # Distinguish between an empty list and a list with an empty string.
    expected_labels = list()
    if test_section.get('LABELS'):
      expected_labels = [c.strip().upper() for c in test_section['LABELS'].split(',')]
    try:
      verify_results(expected_labels, actual_labels, order_matters=True)
    except AssertionError:
      if update_section:
        test_section['LABELS'] = join_section_lines([', '.join(actual_labels)])
      else:
        raise

  # Get the verifier if specified. In the absence of an explicit
  # verifier, defaults to verifying equality.
  verifier = test_section.get('VERIFIER')

  order_matters = contains_order_by(exec_result.query)

  # If the test section is explicitly annotated to specify the order matters,
  # then do not sort the actual and expected results.
  if verifier and verifier.upper() == 'VERIFY_IS_EQUAL':
    order_matters = True

  # If the test result section is explicitly annotated to specify order does not matter,
  # then sort the actual and expected results before verification.
  if verifier and verifier.upper() == 'VERIFY_IS_EQUAL_SORTED':
    order_matters = False
  expected_results_list = []
  if 'MULTI_LINE' in test_section:
    expected_results_list = map(lambda s: s.replace('\n', '\\n'),
        re.findall(r'\[(.*?)\]', expected_results, flags=re.DOTALL))
  else:
    expected_results_list = split_section_lines(expected_results)
  expected = QueryTestResult(expected_results_list, expected_types,
      actual_labels, order_matters)
  actual = QueryTestResult(parse_result_rows(exec_result), actual_types,
      actual_labels, order_matters)
  assert verifier in VERIFIER_MAP.keys(), "Unknown verifier: " + verifier
  try:
    VERIFIER_MAP[verifier](expected, actual)
  except AssertionError:
    if update_section:
      test_section[result_section] = join_section_lines(actual.result_list)
    else:
      raise
def verify_raw_results(test_section, exec_result, file_format, update_section=False,
                       replace_filenames=True, result_section='RESULTS'):
  """
  Accepts a raw exec_result object and verifies it matches the expected results,
  including checking the ERRORS, TYPES, and LABELS test sections.
  If update_section is true, updates test_section with the actual results
  if they don't match the expected results. If update_section is false, failed
  verifications result in assertion failures, otherwise they are ignored.

  This process includes the parsing/transformation of the raw data results into the
  result format used in the tests.

  The result_section parameter can be used to make this function check the results in
  a DML_RESULTS section instead of the regular RESULTS section.
  TODO: separate out the handling of sections like ERRORS from checking of query results
  to allow regular RESULTS/ERRORS sections in tests with DML_RESULTS (IMPALA-4471).
  """
  expected_results = None
  if result_section in test_section:
    expected_results = remove_comments(test_section[result_section])
  else:
    assert 'ERRORS' not in test_section, "'ERRORS' section must have accompanying 'RESULTS' section"
    LOG.info("No results found. Skipping verification");
    return
  if 'ERRORS' in test_section:
    expected_errors = split_section_lines(remove_comments(test_section['ERRORS']))
    actual_errors = apply_error_match_filter(exec_result.log.split('\n'),
                                             replace_filenames)
    try:
      verify_errors(expected_errors, actual_errors)
    except AssertionError:
      if update_section:
        test_section['ERRORS'] = join_section_lines(actual_errors)
      else:
        raise

  if 'TYPES' in test_section:
    # Distinguish between an empty list and a list with an empty string.
    expected_types = list()
    if test_section.get('TYPES'):
      expected_types = [c.strip().upper() for c in test_section['TYPES'].rstrip('\n').split(',')]

    # Avro and Kudu represent TIMESTAMP columns as strings, so tests using TIMESTAMP are
    # skipped because results will be wrong.
    if file_format in ('avro', 'kudu') and 'TIMESTAMP' in expected_types:
        LOG.info("TIMESTAMP columns unsupported in %s, skipping verification." %\
            file_format)
        return

    # Avro does not support as many types as Hive, so the Avro test tables may
    # have different column types than we expect (e.g., INT instead of
    # TINYINT). Bypass the type checking by ignoring the actual types of the Avro
    # table.
    if file_format == 'avro':
      LOG.info("Skipping type verification of Avro-format table.")
      actual_types = expected_types
    else:
      actual_types = parse_column_types(exec_result.schema)

    try:
      verify_results(expected_types, actual_types, order_matters=True)
    except AssertionError:
      if update_section:
        test_section['TYPES'] = join_section_lines([', '.join(actual_types)])
      else:
        raise
  else:
    # This is an insert, so we are comparing the number of rows inserted
    expected_types = ['BIGINT']
    actual_types = ['BIGINT']

  actual_labels = ['DUMMY_LABEL']
  if exec_result and exec_result.schema:
    actual_labels = parse_column_labels(exec_result.schema)

  if 'LABELS' in test_section:
    assert actual_labels is not None
    # Distinguish between an empty list and a list with an empty string.
    expected_labels = list()
    if test_section.get('LABELS'):
      expected_labels = [c.strip().upper() for c in test_section['LABELS'].split(',')]
    try:
      verify_results(expected_labels, actual_labels, order_matters=True)
    except AssertionError:
      if update_section:
        test_section['LABELS'] = join_section_lines([', '.join(actual_labels)])
      else:
        raise

  # Get the verifier if specified. In the absence of an explicit
  # verifier, defaults to verifying equality.
  verifier = test_section.get('VERIFIER')

  order_matters = contains_order_by(exec_result.query)

  # If the test section is explicitly annotated to specify the order matters,
  # then do not sort the actual and expected results.
  if verifier and verifier.upper() == 'VERIFY_IS_EQUAL':
    order_matters = True

  # If the test result section is explicitly annotated to specify order does not matter,
  # then sort the actual and expected results before verification.
  if verifier and verifier.upper() == 'VERIFY_IS_EQUAL_SORTED':
    order_matters = False
  expected_results_list = []
  if 'MULTI_LINE' in test_section:
    expected_results_list = map(lambda s: s.replace('\n', '\\n'),
        re.findall(r'\[(.*?)\]', expected_results, flags=re.DOTALL))
  else:
    expected_results_list = split_section_lines(expected_results)
  expected = QueryTestResult(expected_results_list, expected_types,
      actual_labels, order_matters)
  actual = QueryTestResult(parse_result_rows(exec_result), actual_types,
      actual_labels, order_matters)
  assert verifier in VERIFIER_MAP.keys(), "Unknown verifier: " + verifier
  try:
    VERIFIER_MAP[verifier](expected, actual)
  except AssertionError:
    if update_section:
      test_section[result_section] = join_section_lines(actual.result_list)
    else:
      raise
def verify_raw_results(test_section, exec_result, file_format, update_section=False):
  """
  Accepts a raw exec_result object and verifies it matches the expected results.
  If update_section is true, updates test_section with the actual results
  if they don't match the expected results. If update_section is false, failed
  verifications result in assertion failures, otherwise they are ignored.

  This process includes the parsing/transformation of the raw data results into the
  result format used in the tests.
  """
  expected_results = None

  if 'RESULTS' in test_section:
    expected_results = remove_comments(test_section['RESULTS'])
  else:
    LOG.info("No results found. Skipping verification");
    return

  if 'ERRORS' in test_section:
    expected_errors = split_section_lines(remove_comments(test_section['ERRORS']))
    actual_errors = apply_error_match_filter(exec_result.log.split('\n'))
    try:
      verify_errors(expected_errors, actual_errors)
    except AssertionError:
      if update_section:
        test_section['ERRORS'] = join_section_lines(actual_errors)
      else:
        raise

  if 'TYPES' in test_section:
    # Distinguish between an empty list and a list with an empty string.
    expected_types = list()
    if test_section.get('TYPES'):
      expected_types = [c.strip().upper() for c in test_section['TYPES'].rstrip('\n').split(',')]

    # Avro does not support as many types as Hive, so the Avro test tables may
    # have different column types than we expect (e.g., INT instead of
    # TINYINT). We represent TIMESTAMP columns as strings in Avro, so we bail in
    # this case since the results will be wrong. Otherwise we bypass the type
    # checking by ignoring the actual types of the Avro table.
    if file_format == 'avro':
      if 'TIMESTAMP' in expected_types:
        LOG.info("TIMESTAMP columns unsupported in Avro, skipping verification.")
        return
      LOG.info("Skipping type verification of Avro-format table.")
      actual_types = expected_types
    else:
      actual_types = parse_column_types(exec_result.schema)

    try:
      verify_results(expected_types, actual_types, order_matters=True)
    except AssertionError:
      if update_section:
        test_section['TYPES'] = join_section_lines([', '.join(actual_types)])
      else:
        raise
  else:
    # This is an insert, so we are comparing the number of rows inserted
    expected_types = ['BIGINT']
    actual_types = ['BIGINT']

  actual_labels = ['DUMMY_LABEL']
  if exec_result and exec_result.schema:
    actual_labels = parse_column_labels(exec_result.schema)

  if 'LABELS' in test_section:
    assert actual_labels is not None
    # Distinguish between an empty list and a list with an empty string.
    expected_labels = list()
    if test_section.get('LABELS'):
      expected_labels = [c.strip().upper() for c in test_section['LABELS'].split(',')]
    try:
      verify_results(expected_labels, actual_labels, order_matters=True)
    except AssertionError:
      if update_section:
        test_section['LABELS'] = join_section_lines([', '.join(actual_labels)])
      else:
        raise

  # Get the verifier if specified. In the absence of an explicit
  # verifier, defaults to verifying equality.
  verifier = test_section.get('VERIFIER')

  order_matters = contains_order_by(exec_result.query)

  # If the test section is explicitly annotated to specify the order matters,
  # then do not sort the actual and expected results.
  if verifier and verifier.upper() == 'VERIFY_IS_EQUAL':
    order_matters = True

  # If the test result section is explicitly annotated to specify order does not matter,
  # then sort the actual and expected results before verification.
  if verifier and verifier.upper() == 'VERIFY_IS_EQUAL_SORTED':
    order_matters = False
  expected_results_list = []
  if 'MULTI_LINE' in test_section:
    expected_results_list = map(lambda s: s.replace('\n', '\\n'),
        re.findall(r'\[(.*?)\]', expected_results, flags=re.DOTALL))
  else:
    expected_results_list = split_section_lines(expected_results)
  expected = QueryTestResult(expected_results_list, expected_types,
      actual_labels, order_matters)
  actual = QueryTestResult(parse_result_rows(exec_result), actual_types,
      actual_labels, order_matters)
  assert verifier in VERIFIER_MAP.keys(), "Unknown verifier: " + verifier
  try:
    VERIFIER_MAP[verifier](expected, actual)
  except AssertionError:
    if update_section:
      test_section['RESULTS'] = join_section_lines(actual.result_list)
    else:
      raise