def test_failpoints(self, vector):
    query = vector.get_value('query')
    action = vector.get_value('action')
    location = vector.get_value('location')
    vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')

    try:
      plan_node_ids = self.__parse_plan_nodes_from_explain(query, vector)
    except ImpalaBeeswaxException as e:
      if "MT_DOP not supported" in str(e):
        pytest.xfail(reason="MT_DOP not supported.")
      else:
        raise e

    for node_id in plan_node_ids:
      debug_action = '%d:%s:%s' % (node_id, location, FAILPOINT_ACTION_MAP[action])
      # IMPALA-7046: add jitter to backend startup to exercise various failure paths.
      debug_action += '|COORD_BEFORE_EXEC_RPC:JITTER@[email protected]'

      LOG.info('Current debug action: SET DEBUG_ACTION=%s' % debug_action)
      vector.get_value('exec_option')['debug_action'] = debug_action

      if action == 'CANCEL':
        self.__execute_cancel_action(query, vector)
      elif action == 'FAIL' or action == 'MEM_LIMIT_EXCEEDED':
        self.__execute_fail_action(query, vector)
      else:
        assert 0, 'Unknown action: %s' % action

    # We should be able to execute the same query successfully when no failures are
    # injected.
    del vector.get_value('exec_option')['debug_action']
    self.execute_query(query, vector.get_value('exec_option'))
    def corrupt_file(self, path, rng):
        """ Corrupt the file at 'path' in the local file system in a randomised way using the
    random number generator 'rng'. Rewrites the file in-place.
    Logs a message to describe how the file was corrupted, so the error is reproducible.
    """
        with open(path, "rb") as f:
            data = bytearray(f.read())

        num_corruptions = rng.randint(0, int(math.log(len(data))))
        for _ in xrange(num_corruptions):
            flip_offset = rng.randint(0, len(data) - 1)
            flip_val = rng.randint(0, 255)
            LOG.info(
                "corrupt file: Flip byte in {0} at {1} from {2} to {3}".format(
                    path, flip_offset, data[flip_offset], flip_val))
            data[flip_offset] = flip_val

        if rng.random() < 0.4:
            truncation = rng.randint(0, len(data))
            LOG.info("corrupt file: Truncate {0} to {1}".format(
                path, truncation))
            data = data[:truncation]

        with open(path, "wb") as f:
            f.write(data)
Example #3
0
    def test_failpoints(self, vector):
        query = vector.get_value('query')
        action = vector.get_value('action')
        location = vector.get_value('location')
        vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')

        try:
            plan_node_ids = self.__parse_plan_nodes_from_explain(query, vector)
        except ImpalaBeeswaxException as e:
            if "MT_DOP not supported" in str(e):
                pytest.xfail(reason="MT_DOP not supported.")
            else:
                raise e

        for node_id in plan_node_ids:
            debug_action = '%d:%s:%s' % (node_id, location,
                                         FAILPOINT_ACTION_MAP[action])
            # IMPALA-7046: add jitter to backend startup to exercise various failure paths.
            debug_action += '|COORD_BEFORE_EXEC_RPC:JITTER@[email protected]'

            LOG.info('Current debug action: SET DEBUG_ACTION=%s' %
                     debug_action)
            vector.get_value('exec_option')['debug_action'] = debug_action

            if action == 'CANCEL':
                self.__execute_cancel_action(query, vector)
            elif action == 'FAIL' or action == 'MEM_LIMIT_EXCEEDED':
                self.__execute_fail_action(query, vector)
            else:
                assert 0, 'Unknown action: %s' % action

        # We should be able to execute the same query successfully when no failures are
        # injected.
        del vector.get_value('exec_option')['debug_action']
        self.execute_query(query, vector.get_value('exec_option'))
Example #4
0
    def test_failpoints(self, vector):
        query = vector.get_value('query')
        action = vector.get_value('action')
        location = vector.get_value('location')
        vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')

        plan_node_ids = self.__parse_plan_nodes_from_explain(query, vector)
        for node_id in plan_node_ids:
            debug_action = '%d:%s:%s' % (node_id, location,
                                         FAILPOINT_ACTION_MAP[action])
            # IMPALA-7046: add jitter to backend startup to exercise various failure paths.
            debug_action += '|COORD_BEFORE_EXEC_RPC:JITTER@[email protected]'

            LOG.info('Current debug action: SET DEBUG_ACTION=%s' %
                     debug_action)
            vector.get_value('exec_option')['debug_action'] = debug_action

            if action == 'CANCEL':
                self.__execute_cancel_action(query, vector)
            elif action == 'FAIL' or action == 'MEM_LIMIT_EXCEEDED':
                self.__execute_fail_action(query, vector)
            else:
                assert 0, 'Unknown action: %s' % action

        # We should be able to execute the same query successfully when no failures are
        # injected.
        del vector.get_value('exec_option')['debug_action']
        self.execute_query(query, vector.get_value('exec_option'))

        # Detect any hung fragments left from this test.
        for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
            verifier = MetricVerifier(impalad.service)
            verifier.wait_for_metric("impala-server.num-fragments-in-flight",
                                     0)
Example #5
0
    def test_failpoints(self, vector):
        query = vector.get_value('query')
        action = vector.get_value('action')
        location = vector.get_value('location')
        vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')

        if action == "CANCEL" and location == "PREPARE":
            pytest.xfail(reason="IMPALA-5202 leads to a hang.")

        try:
            plan_node_ids = self.__parse_plan_nodes_from_explain(query, vector)
        except ImpalaBeeswaxException as e:
            if "MT_DOP not supported" in str(e):
                pytest.xfail(reason="MT_DOP not supported.")
            else:
                raise e

        for node_id in plan_node_ids:
            debug_action = '%d:%s:%s' % (node_id, location,
                                         FAILPOINT_ACTION_MAP[action])
            LOG.info('Current debug action: SET DEBUG_ACTION=%s' %
                     debug_action)
            vector.get_value('exec_option')['debug_action'] = debug_action

            if action == 'CANCEL':
                self.__execute_cancel_action(query, vector)
            elif action == 'FAIL' or action == 'MEM_LIMIT_EXCEEDED':
                self.__execute_fail_action(query, vector)
            else:
                assert 0, 'Unknown action: %s' % action

        # We should be able to execute the same query successfully when no failures are
        # injected.
        del vector.get_value('exec_option')['debug_action']
        self.execute_query(query, vector.get_value('exec_option'))
Example #6
0
    def test_strings_utf8(self, vector, unique_database):
        # Create table
        table_name = "ice_str_utf8"
        qualified_table_name = "%s.%s" % (unique_database, table_name)
        query = 'create table %s (a string) stored as iceberg' % qualified_table_name
        self.client.execute(query)

        # Inserted string data should have UTF8 annotation regardless of query options.
        query = 'insert into %s values ("impala")' % qualified_table_name
        self.execute_query(query, {'parquet_annotate_strings_utf8': False})

        # Copy the created file to the local filesystem and parse metadata
        local_file = '/tmp/iceberg_utf8_test_%s.parq' % random.randint(
            0, 10000)
        LOG.info("test_strings_utf8 local file name: " + local_file)
        hdfs_file = get_fs_path('/test-warehouse/%s.db/%s/data/*.parq' %
                                (unique_database, table_name))
        check_call(['hadoop', 'fs', '-copyToLocal', hdfs_file, local_file])
        metadata = get_parquet_metadata(local_file)

        # Extract SchemaElements corresponding to the table column
        a_schema_element = metadata.schema[1]
        assert a_schema_element.name == 'a'

        # Check that the schema uses the UTF8 annotation
        assert a_schema_element.converted_type == ConvertedType.UTF8

        os.remove(local_file)
Example #7
0
 def __execute_fail_action(self, query, vector):
   try:
     self.execute_query(query, vector.get_value('exec_option'),
                        table_format=vector.get_value('table_format'))
     assert 'Expected Failure'
   except ImpalaBeeswaxException as e:
     LOG.debug(e)
Example #8
0
    def corrupt_file(self, path, rng):
        """ Corrupt the file at 'path' in the local file system in a randomised way using the
    random number generator 'rng'. Rewrites the file in-place.
    Logs a message to describe how the file was corrupted, so the error is reproducible.
    """
        with open(path, "rb") as f:
            data = bytearray(f.read())

        num_corruptions = rng.randint(0, int(math.log(len(data))))
        for _ in xrange(num_corruptions):
            flip_offset = rng.randint(0, len(data) - 1)
            flip_val = rng.randint(0, 255)
            LOG.info(
                "corrupt file: Flip byte in {0} at {1} from {2} to {3}".format(
                    path, flip_offset, data[flip_offset], flip_val))
            data[flip_offset] = flip_val

        if rng.random() < 0.4:  # delete random part of the file
            beg = rng.randint(0, len(data) - 1)
            end = rng.randint(beg, len(data))
            LOG.info("corrupt file: Remove range [{0}, {1}) in {2}".format(
                beg, end, path))
            with open(path, "wb") as f:
                f.write(data[:beg])
                f.write(data[end:])
        else:
            with open(path, "wb") as f:
                f.write(data)
Example #9
0
 def __execute_fail_action(self, query, vector):
   try:
     self.execute_query(query, vector.get_value('exec_option'),
                        table_format=vector.get_value('table_format'))
     assert 'Expected Failure'
   except ImpalaBeeswaxException as e:
     LOG.debug(e)
     # IMPALA-5197: None of the debug actions should trigger corrupted file message
     assert 'Corrupt Parquet file' not in str(e)
 def __execute_fail_action(self, query, vector):
   try:
     self.execute_query(query, vector.get_value('exec_option'),
                        table_format=vector.get_value('table_format'))
     assert 'Expected Failure'
   except ImpalaBeeswaxException as e:
     LOG.debug(e)
     # IMPALA-5197: None of the debug actions should trigger corrupted file message
     assert 'Corrupt Parquet file' not in str(e)
 def __execute_cancel_action(self, query, vector):
   LOG.info('Starting async query execution')
   handle = self.execute_query_async(query, vector.get_value('exec_option'),
                                     table_format=vector.get_value('table_format'))
   LOG.info('Sleeping')
   sleep(3)
   cancel_result = self.client.cancel(handle)
   self.client.close_query(handle)
   assert cancel_result.status_code == 0,\
       'Unexpected status code from cancel request: %s' % cancel_result
Example #12
0
 def __execute_cancel_action(self, query, vector):
   LOG.info('Starting async query execution')
   handle = self.execute_query_async(query, vector.get_value('exec_option'),
                                     table_format=vector.get_value('table_format'))
   LOG.info('Sleeping')
   sleep(3)
   cancel_result = self.client.cancel(handle)
   self.client.close_query(handle)
   assert cancel_result.status_code == 0,\
       'Unexpected status code from cancel request: %s' % cancel_result
Example #13
0
    def test_create_alter_bulk_partition(self, vector, unique_database):
        # Change the scale depending on the exploration strategy, with 50 partitions this
        # test runs a few minutes, with 10 partitions it takes ~50s for two configurations.
        num_parts = 50 if self.exploration_strategy() == 'exhaustive' else 10
        fq_tbl_name = unique_database + ".part_test_tbl"
        self.client.execute(
            "create table {0}(i int) partitioned by(j int, s string) "
            "location '{1}/{0}'".format(fq_tbl_name, WAREHOUSE))

        # Add some partitions (first batch of two)
        for i in xrange(num_parts / 5):
            start = time.time()
            self.client.execute(
                "alter table {0} add partition(j={1}, s='{1}')".format(
                    fq_tbl_name, i))
            LOG.info('ADD PARTITION #%d exec time: %s' %
                     (i, time.time() - start))

        # Modify one of the partitions
        self.client.execute("alter table {0} partition(j=1, s='1')"
                            " set fileformat parquetfile".format(fq_tbl_name))

        # Alter one partition to a non-existent location twice (IMPALA-741)
        self.filesystem_client.delete_file_dir("tmp/dont_exist1/",
                                               recursive=True)
        self.filesystem_client.delete_file_dir("tmp/dont_exist2/",
                                               recursive=True)

        self.execute_query_expect_success(
            self.client,
            "alter table {0} partition(j=1,s='1') set location '{1}/tmp/dont_exist1'"
            .format(fq_tbl_name, WAREHOUSE))
        self.execute_query_expect_success(
            self.client,
            "alter table {0} partition(j=1,s='1') set location '{1}/tmp/dont_exist2'"
            .format(fq_tbl_name, WAREHOUSE))

        # Add some more partitions
        for i in xrange(num_parts / 5, num_parts):
            start = time.time()
            self.client.execute(
                "alter table {0} add partition(j={1},s='{1}')".format(
                    fq_tbl_name, i))
            LOG.info('ADD PARTITION #%d exec time: %s' %
                     (i, time.time() - start))

        # Insert data and verify it shows up.
        self.client.execute(
            "insert into table {0} partition(j=1, s='1') select 1".format(
                fq_tbl_name))
        assert '1' == self.execute_scalar(
            "select count(*) from {0}".format(fq_tbl_name))
Example #14
0
  def test_create_alter_bulk_partition(self, vector):
    TBL_NAME = 'foo_part'
    # Change the scale depending on the exploration strategy, with 50 partitions this
    # takes a few minutes to run, with 10 partitions it takes ~50s for two configurations.
    num_parts = 50 if self.exploration_strategy() == 'exhaustive' else 10
    self.client.execute("use default")
    self.client.execute("drop table if exists {0}".format(TBL_NAME))
    self.client.execute("""create table {0}(i int) partitioned by(j int, s string)
         location '{1}/{0}'""".format(TBL_NAME, WAREHOUSE))

    # Add some partitions (first batch of two)
    for i in xrange(num_parts / 5):
      start = time.time()
      self.client.execute("alter table {0} add partition(j={1}, s='{1}')".format(TBL_NAME,
                                                                                 i))
      LOG.info('ADD PARTITION #%d exec time: %s' % (i, time.time() - start))

    # Modify one of the partitions
    self.client.execute("alter table %s partition(j=1, s='1')"
        " set fileformat parquetfile" % TBL_NAME)

    # Alter one partition to a non-existent location twice (IMPALA-741)
    self.filesystem_client.delete_file_dir("tmp/dont_exist1/", recursive=True)
    self.filesystem_client.delete_file_dir("tmp/dont_exist2/", recursive=True)

    self.execute_query_expect_success(self.client,
        "alter table {0} partition(j=1,s='1') set location '{1}/tmp/dont_exist1'"
        .format(TBL_NAME, WAREHOUSE))
    self.execute_query_expect_success(self.client,
        "alter table {0} partition(j=1,s='1') set location '{1}/tmp/dont_exist2'"
        .format(TBL_NAME, WAREHOUSE))

    # Add some more partitions
    for i in xrange(num_parts / 5, num_parts):
      start = time.time()
      self.client.execute("alter table {0} add partition(j={1},s='{1}')".format(TBL_NAME,
                                                                                i))
      LOG.info('ADD PARTITION #%d exec time: %s' % (i, time.time() - start))

    # Insert data and verify it shows up.
    self.client.execute("insert into table {0} partition(j=1, s='1') select 1"
      .format(TBL_NAME))
    assert '1' == self.execute_scalar("select count(*) from {0}".format(TBL_NAME))
    self.client.execute("drop table {0}".format(TBL_NAME))
Example #15
0
def execute_query_expect_debug_action_failure(impala_test_suite, query,
                                              vector):
    """Executes the given query with the configured debug_action and asserts that the
  query fails. Removes the debug_action from the exec options, re-runs the query, and
  assert that it succeeds."""
    assert 'debug_action' in vector.get_value('exec_option')
    # Run the query with the given debug_action and assert that the query fails.
    # execute_query_expect_failure either returns the client exception thrown when executing
    # the query, or the result of the query if it failed but did the client did not throw an
    # exception. Either way, log the result.
    LOG.debug(
        ImpalaTestSuite.execute_query_expect_failure(
            impala_test_suite.client, query, vector.get_value('exec_option')))

    # Assert that the query can be run without the debug_action.
    del vector.get_value('exec_option')['debug_action']
    result = impala_test_suite.execute_query(query,
                                             vector.get_value('exec_option'))
    assert result.success, "Failed to run {0} without debug action".format(
        query)
    def get_schema_elements():
      # Copy the created file to the local filesystem and parse metadata
      local_file = '/tmp/utf8_test_%s.parq' % random.randint(0, 10000)
      LOG.info("test_annotate_utf8_option local file name: " + local_file)
      hdfs_file = get_fs_path('/test-warehouse/%s.db/%s/*.parq'
          % (unique_database, TABLE_NAME))
      check_call(['hadoop', 'fs', '-copyToLocal', hdfs_file, local_file])
      metadata = get_parquet_metadata(local_file)

      # Extract SchemaElements corresponding to the table columns
      a_schema_element = metadata.schema[1]
      assert a_schema_element.name == 'a'
      b_schema_element = metadata.schema[2]
      assert b_schema_element.name == 'b'
      c_schema_element = metadata.schema[3]
      assert c_schema_element.name == 'c'
      d_schema_element = metadata.schema[4]
      assert d_schema_element.name == 'd'

      os.remove(local_file)
      return a_schema_element, b_schema_element, c_schema_element, d_schema_element
  def corrupt_file(self, path, rng):
    """ Corrupt the file at 'path' in the local file system in a randomised way using the
    random number generator 'rng'. Rewrites the file in-place.
    Logs a message to describe how the file was corrupted, so the error is reproducible.
    """
    with open(path, "rb") as f:
      data = bytearray(f.read())

    if rng.random() < 0.5:
      flip_offset = rng.randint(0, len(data) - 1)
      flip_val = rng.randint(0, 255)
      LOG.info("corrupt_file: Flip byte in %s at %d from %d to %d", path, flip_offset,
          data[flip_offset], flip_val)
      data[flip_offset] = flip_val
    else:
      truncation = rng.randint(0, len(data))
      LOG.info("corrupt_file: Truncate %s to %d", path, truncation)
      data = data[:truncation]

    with open(path, "wb") as f:
      f.write(data)
Example #18
0
    def corrupt_file(self, path, rng):
        """ Corrupt the file at 'path' in the local file system in a randomised way using the
    random number generator 'rng'. Rewrites the file in-place.
    Logs a message to describe how the file was corrupted, so the error is reproducible.
    """
        with open(path, "rb") as f:
            data = bytearray(f.read())

        if rng.random() < 0.5:
            flip_offset = rng.randint(0, len(data) - 1)
            flip_val = rng.randint(0, 255)
            LOG.info("corrupt_file: Flip byte in %s at %d from %d to %d", path,
                     flip_offset, data[flip_offset], flip_val)
            data[flip_offset] = flip_val
        else:
            truncation = rng.randint(0, len(data))
            LOG.info("corrupt_file: Truncate %s to %d", path, truncation)
            data = data[:truncation]

        with open(path, "wb") as f:
            f.write(data)
Example #19
0
    def get_schema_elements():
      # Copy the created file to the local filesystem and parse metadata
      local_file = '/tmp/utf8_test_%s.parq' % random.randint(0, 10000)
      LOG.info("test_annotate_utf8_option local file name: " + local_file)
      hdfs_file = get_fs_path('/test-warehouse/%s.db/%s/*.parq'
          % (unique_database, TABLE_NAME))
      check_call(['hadoop', 'fs', '-copyToLocal', hdfs_file, local_file])
      metadata = get_parquet_metadata(local_file)

      # Extract SchemaElements corresponding to the table columns
      a_schema_element = metadata.schema[1]
      assert a_schema_element.name == 'a'
      b_schema_element = metadata.schema[2]
      assert b_schema_element.name == 'b'
      c_schema_element = metadata.schema[3]
      assert c_schema_element.name == 'c'
      d_schema_element = metadata.schema[4]
      assert d_schema_element.name == 'd'

      os.remove(local_file)
      return a_schema_element, b_schema_element, c_schema_element, d_schema_element
def get_num_cache_requests():
  """Returns the number of outstanding cache requests. Due to race conditions in the
    way cache requests are added/dropped/reported (see IMPALA-3040), this function tries
    to return a stable result by making several attempts to stabilize it within a
    reasonable timeout."""
  def get_num_cache_requests_util():
    rc, stdout, stderr = exec_process("hdfs cacheadmin -listDirectives -stats")
    assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout, stderr)
    return len(stdout.split('\n'))

  # IMPALA-3040: This can take time, especially under slow builds like ASAN.
  wait_time_in_sec = build_flavor_timeout(5, slow_build_timeout=20)
  num_stabilization_attempts = 0
  max_num_stabilization_attempts = 10
  new_requests = None
  num_requests = None
  LOG.info("{0} Entered get_num_cache_requests()".format(time.time()))
  while num_stabilization_attempts < max_num_stabilization_attempts:
    new_requests = get_num_cache_requests_util()
    if new_requests == num_requests: break
    LOG.info("{0} Waiting to stabilise: num_requests={1} new_requests={2}".format(
        time.time(), num_requests, new_requests))
    num_requests = new_requests
    num_stabilization_attempts = num_stabilization_attempts + 1
    time.sleep(wait_time_in_sec)
  LOG.info("{0} Final num requests: {1}".format(time.time(), num_requests))
  return num_requests
Example #21
0
def get_num_cache_requests():
    """Returns the number of outstanding cache requests. Due to race conditions in the
    way cache requests are added/dropped/reported (see IMPALA-3040), this function tries
    to return a stable result by making several attempts to stabilize it within a
    reasonable timeout."""
    def get_num_cache_requests_util():
        rc, stdout, stderr = exec_process(
            "hdfs cacheadmin -listDirectives -stats")
        assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout,
                                                                    stderr)
        return len(stdout.split('\n'))

    # IMPALA-3040: This can take time, especially under slow builds like ASAN.
    wait_time_in_sec = specific_build_type_timeout(5, slow_build_timeout=20)
    num_stabilization_attempts = 0
    max_num_stabilization_attempts = 10
    new_requests = None
    num_requests = None
    LOG.info("{0} Entered get_num_cache_requests()".format(time.time()))
    while num_stabilization_attempts < max_num_stabilization_attempts:
        new_requests = get_num_cache_requests_util()
        if new_requests == num_requests: break
        LOG.info("{0} Waiting to stabilise: num_requests={1} new_requests={2}".
                 format(time.time(), num_requests, new_requests))
        num_requests = new_requests
        num_stabilization_attempts = num_stabilization_attempts + 1
        time.sleep(wait_time_in_sec)
    LOG.info("{0} Final num requests: {1}".format(time.time(), num_requests))
    return num_requests
Example #22
0
  def test_failpoints(self, vector):
    query = QUERY
    node_type, node_ids = vector.get_value('target_node')
    action = vector.get_value('action')
    location = vector.get_value('location')

    for node_id in node_ids:
      debug_action = '%d:%s:%s' % (node_id, location, FAILPOINT_ACTION_MAP[action])
      LOG.info('Current debug action: SET DEBUG_ACTION=%s' % debug_action)
      vector.get_value('exec_option')['debug_action'] = debug_action

      if action == 'CANCEL':
        self.__execute_cancel_action(query, vector)
      elif action == 'FAIL' or action == 'MEM_LIMIT_EXCEEDED':
        self.__execute_fail_action(query, vector)
      else:
        assert 0, 'Unknown action: %s' % action

    # We should be able to execute the same query successfully when no failures are
    # injected.
    del vector.get_value('exec_option')['debug_action']
    self.execute_query(query, vector.get_value('exec_option'))
  def corrupt_file(self, path, rng):
    """ Corrupt the file at 'path' in the local file system in a randomised way using the
    random number generator 'rng'. Rewrites the file in-place.
    Logs a message to describe how the file was corrupted, so the error is reproducible.
    """
    with open(path, "rb") as f:
      data = bytearray(f.read())

    num_corruptions = rng.randint(0, int(math.log(len(data))))
    for _ in xrange(num_corruptions):
      flip_offset = rng.randint(0, len(data) - 1)
      flip_val = rng.randint(0, 255)
      LOG.info("corrupt file: Flip byte in {0} at {1} from {2} to {3}".format(
          path, flip_offset, data[flip_offset], flip_val))
      data[flip_offset] = flip_val

    if rng.random() < 0.4:
      truncation = rng.randint(0, len(data))
      LOG.info("corrupt file: Truncate {0} to {1}".format(path, truncation))
      data = data[:truncation]

    with open(path, "wb") as f:
      f.write(data)
Example #24
0
def get_num_cache_requests():
    """Returns the number of outstanding cache requests. Due to race conditions in the
    way cache requests are added/dropped/reported (see IMPALA-3040), this function tries
    to return a stable result by making several attempts to stabilize it within a
    reasonable timeout."""
    def get_num_cache_requests_util():
        rc, stdout, stderr = exec_process(
            "hdfs cacheadmin -listDirectives -stats")
        assert rc == 0, 'Error executing hdfs cacheadmin: %s %s' % (stdout,
                                                                    stderr)
        # remove blank new lines from output count
        lines = [line for line in stdout.split('\n') if line.strip()]
        count = None
        for line in lines:
            if line.startswith("Found "):
                # the line should say "Found <int> entries"
                # if we find this line we parse the number of entries
                # from this line.
                count = int(re.search(r'\d+', line).group())
                break
        # if count is available we return it else we just
        # return the total number of lines
        if count is not None:
            return count
        else:
            return len(stdout.split('\n'))

    # IMPALA-3040: This can take time, especially under slow builds like ASAN.
    wait_time_in_sec = build_flavor_timeout(5, slow_build_timeout=20)
    num_stabilization_attempts = 0
    max_num_stabilization_attempts = 10
    num_requests = None
    LOG.info("{0} Entered get_num_cache_requests()".format(time.time()))
    while num_stabilization_attempts < max_num_stabilization_attempts:
        new_requests = get_num_cache_requests_util()
        if new_requests == num_requests: break
        LOG.info("{0} Waiting to stabilise: num_requests={1} new_requests={2}".
                 format(time.time(), num_requests, new_requests))
        num_requests = new_requests
        num_stabilization_attempts = num_stabilization_attempts + 1
        time.sleep(wait_time_in_sec)
    LOG.info("{0} Final num requests: {1}".format(time.time(), num_requests))
    return num_requests
    # They should both succeed.
    threads = [QuerySubmitThread(COORDINATOR_QUERY, self.cluster.impalads[i])
              for i in xrange(2)]
    for t in threads: t.start()
    for t in threads:
      t.join()
      assert t.error is None

    # Create two threads to submit COORDINATOR_QUERY to one coordinator and
    # SYMMETRIC_QUERY to another coordinator. One of the queries should fail because
    # memory would be overcommitted on daemon 0.
    threads = [QuerySubmitThread(COORDINATOR_QUERY, self.cluster.impalads[0]),
               QuerySubmitThread(SYMMETRIC_QUERY, self.cluster.impalads[1])]
    for t in threads: t.start()
    num_errors = 0
    for t in threads:
      t.join()
      if t.error is not None:
        assert "Failed to get minimum memory reservation" in t.error
        LOG.info("Query failed with error: %s", t.error)
        LOG.info(t.query)
        num_errors += 1
    assert num_errors == 1

    # Check that free buffers are released over time. We set the memory maintenance sleep
    # time very low above so this should happen quickly.
    verifiers = [MetricVerifier(i.service) for i in self.cluster.impalads]
    for v in verifiers:
      v.wait_for_metric("buffer-pool.free-buffers", 0, timeout=60)
      v.wait_for_metric("buffer-pool.free-buffer-bytes", 0, timeout=60)
    def run_fuzz_test(self, vector, unique_database, table, num_copies=1):
        """ Do some basic fuzz testing: create a copy of an existing table with randomly
    corrupted files and make sure that we don't crash or behave in an unexpected way.
    'unique_database' is used for the table, so it will be cleaned up automatically.
    If 'num_copies' is set, create that many corrupted copies of each input file.
    SCANNER_FUZZ_SEED can be set in the environment to reproduce the result (assuming that
    input files are the same).
    SCANNER_FUZZ_KEEP_FILES can be set in the environment to keep the generated files.
    """
        # Create and seed a new random number generator for reproducibility.
        rng = random.Random()
        random_seed = os.environ.get("SCANNER_FUZZ_SEED") or time.time()
        LOG.info("Using random seed %d", random_seed)
        rng.seed(long(random_seed))

        table_format = vector.get_value('table_format')
        self.change_database(self.client, table_format)

        tmp_table_dir = tempfile.mkdtemp(prefix="tmp-scanner-fuzz-%s" % table,
                                         dir=os.path.join(
                                             os.environ['IMPALA_HOME'],
                                             "testdata"))

        self.execute_query("create table %s.%s like %s" %
                           (unique_database, table, table))
        fuzz_table_location = get_fs_path("/test-warehouse/{0}.db/{1}".format(
            unique_database, table))

        LOG.info(
            "Generating corrupted version of %s in %s. Local working directory is %s",
            table, unique_database, tmp_table_dir)

        # Find the location of the existing table and get the full table directory structure.
        table_loc = self._get_table_location(table, vector)
        check_call(
            ['hdfs', 'dfs', '-copyToLocal', table_loc + "/*", tmp_table_dir])

        partitions = self.walk_and_corrupt_table_data(tmp_table_dir,
                                                      num_copies, rng)
        for partition in partitions:
            self.execute_query(
                'alter table {0}.{1} add partition ({2})'.format(
                    unique_database, table, ','.join(partition)))

        # Copy all of the local files and directories to hdfs.
        to_copy = [
            "%s/%s" % (tmp_table_dir, file_or_dir)
            for file_or_dir in os.listdir(tmp_table_dir)
        ]
        check_call(['hdfs', 'dfs', '-copyFromLocal'] + to_copy +
                   [fuzz_table_location])

        if "SCANNER_FUZZ_KEEP_FILES" not in os.environ:
            shutil.rmtree(tmp_table_dir)

        # Querying the corrupted files should not DCHECK or crash.
        self.execute_query("refresh %s.%s" % (unique_database, table))
        # Execute a query that tries to read all the columns and rows in the file.
        # Also execute a count(*) that materializes no columns, since different code
        # paths are exercised.
        # Use abort_on_error=0 to ensure we scan all the files.
        queries = [
            'select count(*) from (select distinct * from {0}.{1}) q'.format(
                unique_database, table),
            'select count(*) from {0}.{1} q'.format(unique_database, table)
        ]

        xfail_msgs = []
        for query in queries:
            for batch_size in self.BATCH_SIZES:
                query_options = {
                    'abort_on_error': '0',
                    'batch_size': batch_size
                }
                try:
                    result = self.execute_query(query,
                                                query_options=query_options)
                    LOG.info('\n'.join(result.log))
                except Exception as e:
                    if 'memory limit exceeded' in str(e).lower():
                        # Memory limit error should fail query.
                        continue
                    msg = "Should not throw error when abort_on_error=0: '{0}'".format(
                        e)
                    LOG.error(msg)
                    # Parquet and compressed text can fail the query for some parse errors.
                    # E.g. corrupt Parquet footer (IMPALA-3773) or a corrupt LZO index file
                    # (IMPALA-4013).
                    if table_format.file_format == 'parquet' or \
                        (table_format.file_format == 'text' and
                        table_format.compression_codec != 'none'):
                        xfail_msgs.append(msg)
                    else:
                        raise
        if len(xfail_msgs) != 0:
            pytest.xfail('\n'.join(xfail_msgs))
Example #27
0
    def run_fuzz_test(self,
                      vector,
                      src_db,
                      src_table,
                      fuzz_db,
                      fuzz_table,
                      num_copies=1,
                      custom_queries=None):
        """ Do some basic fuzz testing: create a copy of an existing table with randomly
    corrupted files and make sure that we don't crash or behave in an unexpected way.
    'unique_database' is used for the table, so it will be cleaned up automatically.
    If 'num_copies' is set, create that many corrupted copies of each input file.
    SCANNER_FUZZ_SEED can be set in the environment to reproduce the result (assuming that
    input files are the same).
    SCANNER_FUZZ_KEEP_FILES can be set in the environment to keep the generated files.
    """
        # Create and seed a new random number generator for reproducibility.
        rng = random.Random()
        random_seed = os.environ.get("SCANNER_FUZZ_SEED") or time.time()
        LOG.info("Using random seed %d", random_seed)
        rng.seed(long(random_seed))

        tmp_table_dir = tempfile.mkdtemp(
            prefix="tmp-scanner-fuzz-%s" % fuzz_table,
            dir=os.path.join(os.environ['IMPALA_HOME'], "testdata"))

        self.execute_query("create table %s.%s like %s.%s" %
                           (fuzz_db, fuzz_table, src_db, src_table))
        fuzz_table_location = get_fs_path("/test-warehouse/{0}.db/{1}".format(
            fuzz_db, fuzz_table))

        LOG.info(
            "Generating corrupted version of %s in %s. Local working directory is %s",
            fuzz_table, fuzz_db, tmp_table_dir)

        # Find the location of the existing table and get the full table directory structure.
        fq_table_name = src_db + "." + src_table
        table_loc = self._get_table_location(fq_table_name, vector)
        check_call(
            ['hdfs', 'dfs', '-copyToLocal', table_loc + "/*", tmp_table_dir])

        partitions = self.walk_and_corrupt_table_data(tmp_table_dir,
                                                      num_copies, rng)
        for partition in partitions:
            self.execute_query(
                'alter table {0}.{1} add partition ({2})'.format(
                    fuzz_db, fuzz_table, ','.join(partition)))

        # Copy all of the local files and directories to hdfs.
        to_copy = [
            "%s/%s" % (tmp_table_dir, file_or_dir)
            for file_or_dir in os.listdir(tmp_table_dir)
        ]
        self.filesystem_client.copy_from_local(to_copy, fuzz_table_location)

        if "SCANNER_FUZZ_KEEP_FILES" not in os.environ:
            shutil.rmtree(tmp_table_dir)

        # Querying the corrupted files should not DCHECK or crash.
        self.execute_query("refresh %s.%s" % (fuzz_db, fuzz_table))
        # Execute a query that tries to read all the columns and rows in the file.
        # Also execute a count(*) that materializes no columns, since different code
        # paths are exercised.
        queries = [
            'select count(*) from (select distinct * from {0}.{1}) q'.format(
                fuzz_db, fuzz_table),
            'select count(*) from {0}.{1} q'.format(fuzz_db, fuzz_table)
        ]
        if custom_queries is not None:
            queries = queries + [
                s.format(fuzz_db, fuzz_table) for s in custom_queries
            ]

        for query, batch_size, disable_codegen in \
            itertools.product(queries, self.BATCH_SIZES, self.DISABLE_CODEGEN_VALUES):
            query_options = copy(vector.get_value('exec_option'))
            query_options['batch_size'] = batch_size
            query_options['disable_codegen'] = disable_codegen
            query_options['disable_codegen_rows_threshold'] = 0
            try:
                result = self.execute_query(query, query_options=query_options)
                LOG.info('\n'.join(result.log))
            except Exception as e:
                if 'memory limit exceeded' in str(e).lower():
                    # Memory limit error should fail query.
                    continue
                msg = "Should not throw error when abort_on_error=0: '{0}'".format(
                    e)
                LOG.error(msg)
                # Parquet and compressed text can fail the query for some parse errors.
                # E.g. corrupt Parquet footer (IMPALA-3773) or a corrupt LZO index file
                # (IMPALA-4013).
                table_format = vector.get_value('table_format')
                if table_format.file_format not in ['parquet', 'orc', 'rc', 'seq'] \
                    and not (table_format.file_format == 'text' and
                    table_format.compression_codec != 'none'):
                    raise
  def run_fuzz_test(self, vector, unique_database, table, num_copies=1):
    """ Do some basic fuzz testing: create a copy of an existing table with randomly
    corrupted files and make sure that we don't crash or behave in an unexpected way.
    'unique_database' is used for the table, so it will be cleaned up automatically.
    If 'num_copies' is set, create that many corrupted copies of each input file.
    SCANNER_FUZZ_SEED can be set in the environment to reproduce the result (assuming that
    input files are the same).
    SCANNER_FUZZ_KEEP_FILES can be set in the environment to keep the generated files.
    """
    # Create and seed a new random number generator for reproducibility.
    rng = random.Random()
    random_seed = os.environ.get("SCANNER_FUZZ_SEED") or time.time()
    LOG.info("Using random seed %d", random_seed)
    rng.seed(long(random_seed))

    table_format = vector.get_value('table_format')
    self.change_database(self.client, table_format)

    tmp_table_dir = tempfile.mkdtemp(prefix="tmp-scanner-fuzz-%s" % table,
        dir=os.path.join(os.environ['IMPALA_HOME'], "testdata"))

    self.execute_query("create table %s.%s like %s" % (unique_database, table, table))
    fuzz_table_location = get_fs_path("/test-warehouse/{0}.db/{1}".format(
        unique_database, table))

    LOG.info("Generating corrupted version of %s in %s. Local working directory is %s",
        table, unique_database, tmp_table_dir)

    # Find the location of the existing table and get the full table directory structure.
    table_loc = self._get_table_location(table, vector)
    check_call(['hdfs', 'dfs', '-copyToLocal', table_loc + "/*", tmp_table_dir])

    partitions = self.walk_and_corrupt_table_data(tmp_table_dir, num_copies, rng)
    for partition in partitions:
      self.execute_query('alter table {0}.{1} add partition ({2})'.format(
          unique_database, table, ','.join(partition)))

    # Copy all of the local files and directories to hdfs.
    to_copy = ["%s/%s" % (tmp_table_dir, file_or_dir)
               for file_or_dir in os.listdir(tmp_table_dir)]
    check_call(['hdfs', 'dfs', '-copyFromLocal'] + to_copy + [fuzz_table_location])

    if "SCANNER_FUZZ_KEEP_FILES" not in os.environ:
      shutil.rmtree(tmp_table_dir)

    # Querying the corrupted files should not DCHECK or crash.
    self.execute_query("refresh %s.%s" % (unique_database, table))
    # Execute a query that tries to read all the columns and rows in the file.
    # Also execute a count(*) that materializes no columns, since different code
    # paths are exercised.
    queries = [
        'select count(*) from (select distinct * from {0}.{1}) q'.format(
            unique_database, table),
        'select count(*) from {0}.{1} q'.format(unique_database, table)]

    for query, batch_size, disable_codegen in \
        itertools.product(queries, self.BATCH_SIZES, self.DISABLE_CODEGEN_VALUES):
      query_options = copy(vector.get_value('exec_option'))
      query_options['batch_size'] = batch_size
      query_options['disable_codegen'] = disable_codegen
      try:
        result = self.execute_query(query, query_options = query_options)
        LOG.info('\n'.join(result.log))
      except Exception as e:
        if 'memory limit exceeded' in str(e).lower():
          # Memory limit error should fail query.
          continue
        msg = "Should not throw error when abort_on_error=0: '{0}'".format(e)
        LOG.error(msg)
        # Parquet and compressed text can fail the query for some parse errors.
        # E.g. corrupt Parquet footer (IMPALA-3773) or a corrupt LZO index file
        # (IMPALA-4013).
        if table_format.file_format != 'parquet' \
            and not (table_format.file_format == 'text' and
            table_format.compression_codec != 'none'):
          raise