Esempio n. 1
0
    def test_hive_udfs_missing_jar(self, vector):
        """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
        # Copy hive-exec.jar to a temporary file
        jar_path = "tmp/" + get_random_id(5) + ".jar"
        self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path)
        drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
        create_fn_stmt = "create function default.pi_missing_jar() returns double \
        location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path

        cluster = ImpalaCluster()
        impalad = cluster.get_any_impalad()
        client = impalad.service.create_beeswax_client()
        # Create and drop functions with sync_ddl to make sure they are reflected
        # in every impalad.
        exec_option = vector.get_value('exec_option')
        exec_option['sync_ddl'] = 1

        self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
        self.execute_query_expect_success(client, create_fn_stmt, exec_option)
        # Delete the udf jar
        self.hdfs_client.delete_file_dir(jar_path)

        different_impalad = cluster.get_different_impalad(impalad)
        client = different_impalad.service.create_beeswax_client()
        # Run a query using the udf from an impalad other than the one
        # we used to create the function. This is to bypass loading from
        # the cache
        try:
            self.execute_query_using_client(client,
                                            "select default.pi_missing_jar()",
                                            vector)
            assert False, "Query expected to fail"
        except ImpalaBeeswaxException, e:
            assert "Failed to get file info" in str(e)
Esempio n. 2
0
  def test_hive_udfs_missing_jar(self, vector):
    """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
    # Copy hive-exec.jar to a temporary file
    jar_path = "tmp/" + get_random_id(5) + ".jar"
    self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path)
    drop_fn_stmt = "drop function if exists default.pi_missing_jar()"
    create_fn_stmt = "create function default.pi_missing_jar() returns double \
        location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path

    cluster = ImpalaCluster()
    impalad = cluster.get_any_impalad()
    client = impalad.service.create_beeswax_client()
    # Create and drop functions with sync_ddl to make sure they are reflected
    # in every impalad.
    exec_option = vector.get_value('exec_option')
    exec_option['sync_ddl'] = 1

    self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
    self.execute_query_expect_success(client, create_fn_stmt, exec_option)
    # Delete the udf jar
    self.hdfs_client.delete_file_dir(jar_path)

    different_impalad = cluster.get_different_impalad(impalad)
    client = different_impalad.service.create_beeswax_client()
    # Run a query using the udf from an impalad other than the one
    # we used to create the function. This is to bypass loading from
    # the cache
    try:
      self.execute_query_using_client(client,
          "select default.pi_missing_jar()", vector)
      assert False, "Query expected to fail"
    except ImpalaBeeswaxException, e:
      assert "Failed to get file info" in str(e)
Esempio n. 3
0
 def test_query_profile_encoded_unknown_query_id(self):
   """Test that /query_profile_encoded error message starts with the expected line in
   case of missing query and does not contain any leading whitespace.
   """
   cluster = ImpalaCluster()
   impalad = cluster.get_any_impalad()
   result = impalad.service.read_debug_webpage("query_profile_encoded?query_id=123")
   assert result.startswith("Could not obtain runtime profile: Query id")
 def test_query_profile_encoded_unknown_query_id(self):
   """Test that /query_profile_encoded error message starts with the expected line in
   case of missing query and does not contain any leading whitespace.
   """
   cluster = ImpalaCluster()
   impalad = cluster.get_any_impalad()
   result = impalad.service.read_debug_webpage("query_profile_encoded?query_id=123")
   assert result.startswith("Could not obtain runtime profile: Query id")
Esempio n. 5
0
  def test_create_drop_data_src(self, vector):
    """This will create, run, and drop the same data source repeatedly, exercising
    the lib cache mechanism.
    """
    create_ds_stmt = ("CREATE DATA SOURCE test_data_src "
        "LOCATION '%s/data-sources/test-data-source.jar' "
        "CLASS 'com.cloudera.impala.extdatasource.AllTypesDataSource' "
        "API_VERSION 'V1'" % WAREHOUSE)
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('dummy_init_string')"""
    drop_ds_stmt = "drop data source %s test_data_src"
    drop_tbl_stmt = "drop table %s data_src_tbl"
    select_stmt = "select * from data_src_tbl limit 1"
    class_cache_hits_metric = "external-data-source.class-cache.hits"
    class_cache_misses_metric = "external-data-source.class-cache.misses"

    create_stmts = [create_ds_stmt, create_tbl_stmt]
    drop_stmts = [drop_tbl_stmt, drop_ds_stmt]

    # Get the impalad to capture metrics
    impala_cluster = ImpalaCluster()
    impalad = impala_cluster.get_first_impalad()

    # Initial metric values
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    # Test with 1 node so we can check the metrics on only the coordinator
    vector.get_value('exec_option')['num_nodes'] = 1
    num_iterations = 2
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, num_iterations)

    # Check class cache metrics. Shouldn't have any new cache hits, there should be
    # 2 cache misses for every iteration (jar is loaded by both the FE and BE).
    expected_cache_misses = class_cache_misses + (num_iterations * 2)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits)
    impalad.service.wait_for_metric_value(class_cache_misses_metric,
        expected_cache_misses)

    # Test with a table that caches the class
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('CACHE_CLASS::dummy_init_string')"""
    create_stmts = [create_ds_stmt, create_tbl_stmt]
    # Run once before capturing metrics because the class already may be cached from
    # a previous test run.
    # TODO: Provide a way to clear the cache
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)

    # Capture metric values and run again, should hit the cache.
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits + 2)
    impalad.service.wait_for_metric_value(class_cache_misses_metric, class_cache_misses)
Esempio n. 6
0
  def test_create_drop_data_src(self, vector):
    """This will create, run, and drop the same data source repeatedly, exercising
    the lib cache mechanism.
    """
    create_ds_stmt = ("CREATE DATA SOURCE test_data_src "
        "LOCATION '%s/data-sources/test-data-source.jar' "
        "CLASS 'com.cloudera.impala.extdatasource.AllTypesDataSource' "
        "API_VERSION 'V1'" % WAREHOUSE)
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('dummy_init_string')"""
    drop_ds_stmt = "drop data source %s test_data_src"
    drop_tbl_stmt = "drop table %s data_src_tbl"
    select_stmt = "select * from data_src_tbl limit 1"
    class_cache_hits_metric = "external-data-source.class-cache.hits"
    class_cache_misses_metric = "external-data-source.class-cache.misses"

    create_stmts = [create_ds_stmt, create_tbl_stmt]
    drop_stmts = [drop_tbl_stmt, drop_ds_stmt]

    # Get the impalad to capture metrics
    impala_cluster = ImpalaCluster()
    impalad = impala_cluster.get_first_impalad()

    # Initial metric values
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    # Test with 1 node so we can check the metrics on only the coordinator
    vector.get_value('exec_option')['num_nodes'] = 1
    num_iterations = 2
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, num_iterations)

    # Check class cache metrics. Shouldn't have any new cache hits, there should be
    # 2 cache misses for every iteration (jar is loaded by both the FE and BE).
    expected_cache_misses = class_cache_misses + (num_iterations * 2)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits)
    impalad.service.wait_for_metric_value(class_cache_misses_metric,
        expected_cache_misses)

    # Test with a table that caches the class
    create_tbl_stmt = """CREATE TABLE data_src_tbl (x int)
        PRODUCED BY DATA SOURCE test_data_src('CACHE_CLASS::dummy_init_string')"""
    create_stmts = [create_ds_stmt, create_tbl_stmt]
    # Run once before capturing metrics because the class already may be cached from
    # a previous test run.
    # TODO: Provide a way to clear the cache
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)

    # Capture metric values and run again, should hit the cache.
    class_cache_hits = impalad.service.get_metric_value(class_cache_hits_metric)
    class_cache_misses = impalad.service.get_metric_value(class_cache_misses_metric)
    self.create_drop_ddl(vector, "data_src_test", create_stmts, drop_stmts,
        select_stmt, 1)
    impalad.service.wait_for_metric_value(class_cache_hits_metric, class_cache_hits + 2)
    impalad.service.wait_for_metric_value(class_cache_misses_metric, class_cache_misses)
Esempio n. 7
0
    def test_pull_stats_profile(self, vector, unique_database):
        """Checks that the frontend profile includes metrics when computing
       incremental statistics.
    """
        try:
            client = ImpalaCluster().impalads[0].service.create_beeswax_client(
            )
            create = "create table test like functional.alltypes"
            load = "insert into test partition(year, month) select * from functional.alltypes"
            insert = """insert into test partition(year=2009, month=1) values
                  (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
                  "10/21/09","4","2009-10-21 03:24:09.600000000")"""
            stats_all = "compute incremental stats test"
            stats_part = "compute incremental stats test partition (year=2009,month=1)"

            # Checks that profile does not have metrics for incremental stats when
            # the operation is not 'compute incremental stats'.
            self.execute_query_expect_success(client,
                                              "use %s" % unique_database)
            profile = self.execute_query_expect_success(client,
                                                        create).runtime_profile
            assert profile.count("StatsFetch") == 0
            # Checks that incremental stats metrics are present when 'compute incremental
            # stats' is run. Since the table has no stats, expect that no bytes are fetched.
            self.execute_query_expect_success(client, load)
            profile = self.execute_query_expect_success(
                client, stats_all).runtime_profile
            assert profile.count("StatsFetch") > 1
            assert profile.count("StatsFetch.CompressedBytes: 0") == 1
            # Checks that bytes fetched is non-zero since incremental stats are present now
            # and should have been fetched.
            self.execute_query_expect_success(client, insert)
            profile = self.execute_query_expect_success(
                client, stats_part).runtime_profile
            assert profile.count("StatsFetch") > 1
            assert profile.count("StatsFetch.CompressedBytes") == 1
            assert profile.count("StatsFetch.CompressedBytes: 0") == 0
            # Adds a partition, computes stats, and checks that the metrics in the profile
            # reflect the operation.
            alter = "alter table test add partition(year=2011, month=1)"
            insert_new_partition = """
          insert into test partition(year=2011, month=1) values
          (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
          "10/21/09","4","2009-10-21 03:24:09.600000000")
          """
            self.execute_query_expect_success(client, alter)
            self.execute_query_expect_success(client, insert_new_partition)
            profile = self.execute_query_expect_success(
                client, stats_all).runtime_profile
            assert profile.count("StatsFetch.TotalPartitions: 25") == 1
            assert profile.count("StatsFetch.NumPartitionsWithStats: 24") == 1
        finally:
            client.close()
  def test_run_invalidate_refresh(self, vector):
    """Verifies that running concurrent invalidate table/catalog and refresh commands
    don't cause failures with other running workloads and ensures catalog versions
    are strictly increasing."""
    target_db = self.execute_scalar('select current_database()', vector=vector)
    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
    impalad = impala_cluster.impalads[0].service
    catalogd = impala_cluster.catalogd.service

    for i in xrange(NUM_ITERATIONS):
      # Get the catalog versions for the table before running the workload
      before_versions = dict()
      before_versions['catalogd'] =\
          self.get_table_version(catalogd, target_db, 'lineitem')
      before_versions['impalad'] = self.get_table_version(impalad, target_db, 'lineitem')

      self.run_test_case('stress-with-invalidate-refresh', vector)

      # Get the catalog versions for the table after running the workload
      after_versions = dict()
      after_versions['catalogd'] = self.get_table_version(catalogd, target_db, 'lineitem')
      after_versions['impalad'] = self.get_table_version(impalad, target_db, 'lineitem')

      # Catalog versions should be strictly increasing
      assert before_versions['impalad'] < after_versions['impalad']
      assert before_versions['catalogd'] < after_versions['catalogd']
Esempio n. 9
0
 def execute(self, query):
   """Executes a query on the coordinator of the local minicluster."""
   cluster = ImpalaCluster.get_e2e_test_cluster()
   if len(cluster.impalads) == 0:
     raise Exception("Coordinator not running")
   client = cluster.get_first_impalad().service.create_hs2_client()
   return client.execute(query)
Esempio n. 10
0
    def test_run_invalidate_refresh(self, vector):
        """Verifies that running concurrent invalidate table/catalog and refresh commands
    don't cause failures with other running workloads and ensures catalog versions
    are strictly increasing."""
        target_db = self.execute_scalar('select current_database()',
                                        vector=vector)
        impala_cluster = ImpalaCluster()
        impalad = impala_cluster.impalads[0].service
        catalogd = impala_cluster.catalogd.service

        for i in xrange(NUM_ITERATIONS):
            # Get the catalog versions for the table before running the workload
            before_versions = dict()
            before_versions['catalogd'] =\
                self.get_table_version(catalogd, target_db, 'lineitem')
            before_versions['impalad'] = self.get_table_version(
                impalad, target_db, 'lineitem')

            self.run_test_case('stress-with-invalidate-refresh', vector)

            # Get the catalog versions for the table after running the workload
            after_versions = dict()
            after_versions['catalogd'] = self.get_table_version(
                catalogd, target_db, 'lineitem')
            after_versions['impalad'] = self.get_table_version(
                impalad, target_db, 'lineitem')

            # Catalog versions should be strictly increasing
            assert before_versions['impalad'] < after_versions['impalad']
            assert before_versions['catalogd'] < after_versions['catalogd']
Esempio n. 11
0
def wait_for_cluster(timeout_in_seconds=CLUSTER_WAIT_TIMEOUT_IN_SECONDS):
  """Checks if the cluster is "ready"

  A cluster is deemed "ready" if:
    - All backends are registered with the statestore.
    - Each impalad knows about all other impalads.
    - Each coordinator impalad's catalog cache is ready.
  This information is retrieved by querying the statestore debug webpage
  and each individual impalad's metrics webpage.
  """
  impala_cluster = ImpalaCluster()
  # impalad processes may take a while to come up.
  wait_for_impala_process_count(impala_cluster)

  # TODO: fix this for coordinator-only nodes as well.
  expected_num_backends = options.cluster_size
  if options.catalog_init_delays != "":
    for delay in options.catalog_init_delays.split(","):
      if int(delay.strip()) != 0: expected_num_backends -= 1

  for impalad in impala_cluster.impalads:
    impalad.service.wait_for_num_known_live_backends(expected_num_backends,
        timeout=CLUSTER_WAIT_TIMEOUT_IN_SECONDS, interval=2)
    if impalad._get_arg_value("is_coordinator", default="true") == "true" and \
       impalad._get_arg_value("stress_catalog_init_delay_ms", default=0) == 0:
      wait_for_catalog(impalad)
Esempio n. 12
0
    def test_jvm_mem_tracking(self, vector):
        service = ImpalaCluster.get_e2e_test_cluster().impalads[0].service
        verifier = MemUsageVerifier(service)
        proc_values = verifier.get_mem_usage_values('Process')
        proc_total = proc_values['total']
        proc_limit = proc_values['limit']
        max_heap_size = verifier.get_mem_usage_values(
            'JVM: max heap size')['total']
        non_heap_committed = verifier.get_mem_usage_values(
            'JVM: non-heap committed')['total']
        MB = 1024 * 1024
        LOG.info(
            "proc_total={0}, max_heap_size={1} non_heap_committed={2}".format(
                proc_total, max_heap_size, non_heap_committed))
        # The max heap size will be lower than -Xmx but should be in the same general range.
        assert max_heap_size >= 900 * MB and max_heap_size <= 1024 * MB
        # The non-heap committed value is hard to predict but should be non-zero.
        assert non_heap_committed > 0
        # Process mem consumption should include both of the above values.
        assert proc_total > max_heap_size + non_heap_committed

        # Make sure that the admittable memory is within 100MB of the process limit
        # minus the heap size (there may be some rounding errors).
        backend_json = json.loads(service.read_debug_webpage('backends?json'))
        admit_limit_human_readable = backend_json['backends'][0][
            'admit_mem_limit']
        admit_limit = parse_mem_value(admit_limit_human_readable)
        LOG.info("proc_limit={0}, admit_limit={1}".format(
            proc_limit, admit_limit))
        assert abs(admit_limit - (proc_limit - max_heap_size)) <= 100 * MB
Esempio n. 13
0
def run_stress_workload(queries, database, workload, start_delay,
        kill_frequency, concurrency, iterations, num_impalads):
  """Runs the given set of queries against the the given database. 'concurrency' controls
  how many concurrent streams of the queries are run, and 'iterations' controls how many
  times the workload is run. 'num_impalads' controls the number of impalads to launch.
  The 'kill_frequency' and 'start_delay' are used to configure the impalad killer thread.
  'workload' is purely used for debugging purposes."""

  # Create the global QueryRetryLatch.
  global completed_queries_latch
  completed_queries_latch = QueryRetryLatch(concurrency)

  # Start the Impala cluster and set the coordinator.
  start_impala_cluster(num_impalads)
  cluster = ImpalaCluster()
  impala_coordinator = cluster.impalads[0]

  # Start the 'random impalad killer' thread.
  start_random_impalad_killer(kill_frequency, start_delay, cluster)

  # Run the stress test 'iterations' times.
  for i in xrange(iterations):
    LOG.info("Starting iteration {0} of workload {1}".format(i, workload))
    run_concurrent_workloads(concurrency, impala_coordinator, database,
        queries)

  # Print the total number of queries retried.
  global total_queries_retried_lock
  global total_queries_retried
  total_queries_retried_lock.acquire()
  LOG.info("Total queries retried {0}".format(total_queries_retried))
  total_queries_retried_lock.release()
Esempio n. 14
0
    def __test_invalid_result_caching(self, sql_stmt):
        """ Tests that invalid requests for query-result caching fail
    using the given sql_stmt."""
        impala_cluster = ImpalaCluster.get_e2e_test_cluster()
        impalad = impala_cluster.impalads[0].service

        execute_statement_req = TCLIService.TExecuteStatementReq()
        execute_statement_req.sessionHandle = self.session_handle
        execute_statement_req.statement = sql_stmt
        execute_statement_req.confOverlay = dict()

        # Test that a malformed result-cache size returns an error.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "bad_number"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Invalid value 'bad_number' for 'impala.resultset.cache.size' option"
        )
        self.__verify_num_cached_rows(0)
        assert 0 == impalad.get_num_in_flight_queries()

        # Test that a result-cache size exceeding the per-Impalad maximum returns an error.
        # The default maximum result-cache size is 100000.
        execute_statement_req.confOverlay[
            self.IMPALA_RESULT_CACHING_OPT] = "100001"
        execute_statement_resp = self.hs2_client.ExecuteStatement(
            execute_statement_req)
        HS2TestSuite.check_response(
            execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS,
            "Requested result-cache size of 100001 exceeds Impala's maximum of 100000"
        )
        self.__verify_num_cached_rows(0)
        assert 0 == impalad.get_num_in_flight_queries()
Esempio n. 15
0
  def _verify_describe_view(self, vector, view_name, expected_substr):
    """
    Verify across all impalads that the view 'view_name' has the given substring in its
    expanded SQL.

    If SYNC_DDL is enabled, the verification should complete immediately. Otherwise,
    loops waiting for the expected condition to pass.
    """
    if vector.get_value('exec_option')['sync_ddl']:
      num_attempts = 1
    else:
      num_attempts = 60
    for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
      client = impalad.service.create_beeswax_client()
      try:
        for attempt in itertools.count(1):
          assert attempt <= num_attempts, "ran out of attempts"
          try:
            result = self.execute_query_expect_success(
                client, "describe formatted %s" % view_name)
            exp_line = [l for l in result.data if 'View Expanded' in l][0]
          except ImpalaBeeswaxException, e:
            # In non-SYNC_DDL tests, it's OK to get a "missing view" type error
            # until the metadata propagates.
            exp_line = "Exception: %s" % e
          if expected_substr in exp_line.lower():
            return
          time.sleep(1)
      finally:
        client.close()
Esempio n. 16
0
 def _verify_no_fragments_running(self):
   """Raise an exception if there are fragments running on the cluster after a
   timeout."""
   for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
     verifier = MetricVerifier(impalad.service)
     verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=10)
     verifier.wait_for_backend_admission_control_state(timeout=10)
Esempio n. 17
0
    def _verify_describe_view(self, vector, view_name, expected_substr):
        """
    Verify across all impalads that the view 'view_name' has the given substring in its
    expanded SQL.

    If SYNC_DDL is enabled, the verification should complete immediately. Otherwise,
    loops waiting for the expected condition to pass.
    """
        if vector.get_value('exec_option')['sync_ddl']:
            num_attempts = 1
        else:
            num_attempts = 60
        for impalad in ImpalaCluster().impalads:
            client = impalad.service.create_beeswax_client()
            try:
                for attempt in itertools.count(1):
                    assert attempt <= num_attempts, "ran out of attempts"
                    try:
                        result = self.execute_query_expect_success(
                            client, "describe formatted %s" % view_name)
                        exp_line = [
                            l for l in result.data if 'View Expanded' in l
                        ][0]
                    except ImpalaBeeswaxException, e:
                        # In non-SYNC_DDL tests, it's OK to get a "missing view" type error
                        # until the metadata propagates.
                        exp_line = "Exception: %s" % e
                    if expected_substr in exp_line.lower():
                        return
                    time.sleep(1)
            finally:
                client.close()
  def test_jvm_mem_tracking(self, vector):
    service = ImpalaCluster.get_e2e_test_cluster().impalads[0].service
    verifier = MemUsageVerifier(service)
    proc_values = verifier.get_mem_usage_values('Process')
    proc_total = proc_values['total']
    proc_limit = proc_values['limit']
    max_heap_size = verifier.get_mem_usage_values('JVM: max heap size')['total']
    non_heap_committed = verifier.get_mem_usage_values('JVM: non-heap committed')['total']
    MB = 1024 * 1024
    LOG.info("proc_total={0}, max_heap_size={1} non_heap_committed={2}".format(
        proc_total, max_heap_size, non_heap_committed))
    # The max heap size will be lower than -Xmx but should be in the same general range.
    assert max_heap_size >= 900 * MB and max_heap_size <= 1024 * MB
    # The non-heap committed value is hard to predict but should be non-zero.
    assert non_heap_committed > 0
    # Process mem consumption should include both of the above values.
    assert proc_total > max_heap_size + non_heap_committed

    # Make sure that the admittable memory is within 100MB of the process limit
    # minus the heap size (there may be some rounding errors).
    backend_json = json.loads(service.read_debug_webpage('backends?json'))
    admit_limit_human_readable = backend_json['backends'][0]['admit_mem_limit']
    admit_limit = parse_mem_value(admit_limit_human_readable)
    LOG.info("proc_limit={0}, admit_limit={1}".format(proc_limit, admit_limit))
    assert abs(admit_limit - (proc_limit - max_heap_size)) <= 100 * MB
Esempio n. 19
0
  def __test_invalid_result_caching(self, sql_stmt):
    """ Tests that invalid requests for query-result caching fail
    using the given sql_stmt."""
    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
    impalad = impala_cluster.impalads[0].service

    execute_statement_req = TCLIService.TExecuteStatementReq()
    execute_statement_req.sessionHandle = self.session_handle
    execute_statement_req.statement = sql_stmt
    execute_statement_req.confOverlay = dict()

    # Test that a malformed result-cache size returns an error.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "bad_number"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp,
        TCLIService.TStatusCode.ERROR_STATUS,
        "Invalid value 'bad_number' for 'impala.resultset.cache.size' option")
    self.__verify_num_cached_rows(0)
    assert 0 == impalad.get_num_in_flight_queries()

    # Test that a result-cache size exceeding the per-Impalad maximum returns an error.
    # The default maximum result-cache size is 100000.
    execute_statement_req.confOverlay[self.IMPALA_RESULT_CACHING_OPT] = "100001"
    execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
    HS2TestSuite.check_response(execute_statement_resp,
        TCLIService.TStatusCode.ERROR_STATUS,
        "Requested result-cache size of 100001 exceeds Impala's maximum of 100000")
    self.__verify_num_cached_rows(0)
    assert 0 == impalad.get_num_in_flight_queries()
Esempio n. 20
0
    def test_views_describe(self, vector, unique_database):
        # IMPALA-6896: Tests that altered views can be described by all impalads.
        impala_cluster = ImpalaCluster()
        impalads = impala_cluster.impalads
        view_name = "%s.test_describe_view" % unique_database
        query_opts = vector.get_value('exec_option')
        first_client = impalads[0].service.create_beeswax_client()
        try:
            # Create a view and verify it's visible.
            self.execute_query_expect_success(
                first_client, "create view {0} as "
                "select * from functional.alltypes".format(view_name),
                query_opts)
            self._verify_describe_view(vector, view_name,
                                       "select * from functional.alltypes")

            # Alter the view and verify the alter is visible.
            self.execute_query_expect_success(
                first_client, "alter view {0} as "
                "select * from functional.alltypesagg".format(view_name),
                query_opts)
            self._verify_describe_view(vector, view_name,
                                       "select * from functional.alltypesagg")
        finally:
            first_client.close()
Esempio n. 21
0
 def test_low_mem_limit_no_fragments(self, vector):
   self.low_memory_limit_test(vector, 'tpch-q14', self.MIN_MEM_FOR_TPCH['Q14'])
   self.low_memory_limit_test(vector, 'tpch-q18', self.MIN_MEM_FOR_TPCH['Q18'])
   self.low_memory_limit_test(vector, 'tpch-q20', self.MIN_MEM_FOR_TPCH['Q20'])
   for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
     verifier = MetricVerifier(impalad.service)
     verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
Esempio n. 22
0
    def test_low_mem_limit_low_selectivity_scan(self, cursor, mem_limit,
                                                vector):
        """Tests that the queries specified in this test suite run under the given
    memory limits."""
        exec_options = dict(
            (k, str(v))
            for k, v in vector.get_value('exec_option').iteritems())
        exec_options['mem_limit'] = "{0}m".format(mem_limit)
        for i, q in enumerate(self.QUERIES):
            try:
                cursor.execute(q, configuration=exec_options)
                cursor.fetchall()
            except Exception as e:
                if (mem_limit > self.QUERY_MEM_LIMITS[i]):
                    raise
                assert "Memory limit exceeded" in str(e)

        # IMPALA-4654: Validate the fix for a bug where LimitReached() wasn't respected in
        # the KuduScanner and the limit query above would result in a fragment running an
        # additional minute. This ensures that the num fragments 'in flight' reaches 0 in
        # less time than IMPALA-4654 was reproducing (~60sec) but yet still enough time that
        # this test won't be flaky.
        verifiers = [
            MetricVerifier(i.service) for i in ImpalaCluster().impalads
        ]
        for v in verifiers:
            v.wait_for_metric("impala-server.num-fragments-in-flight",
                              0,
                              timeout=30)
Esempio n. 23
0
 def test_insert_mem_limit(self, vector, unique_database):
     if (vector.get_value('table_format').file_format == 'parquet'):
         vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
             vector.get_value('compression_codec')
     self.run_test_case(
         'QueryTest/insert-mem-limit',
         vector,
         unique_database,
         multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1,
         test_file_vars={
             '$ORIGINAL_DB':
             ImpalaTestSuite.get_db_name_from_format(
                 vector.get_value('table_format'))
         })
     # IMPALA-7023: These queries can linger and use up memory, causing subsequent
     # tests to hit memory limits. Wait for some time to allow the query to
     # be reclaimed.
     verifiers = [
         MetricVerifier(i.service)
         for i in ImpalaCluster.get_e2e_test_cluster().impalads
     ]
     for v in verifiers:
         v.wait_for_metric("impala-server.num-fragments-in-flight",
                           0,
                           timeout=180)
Esempio n. 24
0
  def test_views_describe(self, vector, unique_database):
    # IMPALA-6896: Tests that altered views can be described by all impalads.
    impala_cluster = ImpalaCluster()
    impalads = impala_cluster.impalads
    first_client = impalads[0].service.create_beeswax_client()
    try:
      self.execute_query_expect_success(first_client,
                                        "create view {0}.test_describe_view as "
                                        "select * from functional.alltypes"
                                        .format(unique_database), {'sync_ddl': 1})
      self.execute_query_expect_success(first_client,
                                        "alter view {0}.test_describe_view as "
                                        "select * from functional.alltypesagg"
                                        .format(unique_database))
    finally:
      first_client.close()

    for impalad in impalads:
      client = impalad.service.create_beeswax_client()
      try:
        while True:
          result = self.execute_query_expect_success(
              client, "describe formatted {0}.test_describe_view"
              .format(unique_database))
          if any("select * from functional.alltypesagg" in s.lower()
                 for s in result.data):
            break
          time.sleep(1)
      finally:
        client.close()
Esempio n. 25
0
 def _run_query_all_impalads(self, exec_options, query, expected):
     impala_cluster = ImpalaCluster.get_e2e_test_cluster()
     for impalad in impala_cluster.impalads:
         client = impalad.service.create_beeswax_client()
         result = self.execute_query_expect_success(client, query,
                                                    exec_options)
         assert result.data == expected, impalad
Esempio n. 26
0
  def _start_impala_cluster(cls, options, log_dir=os.getenv('LOG_DIR', "/tmp/"),
      cluster_size=CLUSTER_SIZE, num_coordinators=NUM_COORDINATORS,
      use_exclusive_coordinators=False, log_level=1, expected_num_executors=CLUSTER_SIZE):
    cls.impala_log_dir = log_dir
    # We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests specifically test that
    # certain custom startup arguments work and we want to keep them independent of dev
    # environments.
    cmd = [os.path.join(IMPALA_HOME, 'bin/start-impala-cluster.py'),
           '--cluster_size=%d' % cluster_size,
           '--num_coordinators=%d' % num_coordinators,
           '--log_dir=%s' % log_dir,
           '--log_level=%s' % log_level]

    if use_exclusive_coordinators:
      cmd.append("--use_exclusive_coordinators")

    if os.environ.get("ERASURE_CODING") == "true":
      cmd.append("--impalad_args=--default_query_options=allow_erasure_coded_files=true")

    try:
      check_call(cmd + options, close_fds=True)
    finally:
      # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
      cls.cluster = ImpalaCluster()
    statestored = cls.cluster.statestored
    if statestored is None:
      raise Exception("statestored was not found")

    # The number of statestore subscribers is
    # cluster_size (# of impalad) + 1 (for catalogd).
    expected_subscribers = cluster_size + 1

    statestored.service.wait_for_live_subscribers(expected_subscribers, timeout=60)
    for impalad in cls.cluster.impalads:
      impalad.service.wait_for_num_known_live_backends(expected_num_executors, timeout=60)
Esempio n. 27
0
    def test_failure_in_prepare_multi_fragment(self):
        # Test that if one fragment fails that the others are cleaned up during the ensuing
        # cancellation.
        verifiers = [
            MetricVerifier(i.service)
            for i in ImpalaCluster.get_e2e_test_cluster().impalads
        ]
        # Fail the scan node
        self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'")

        # Force a query plan that will have three fragments or more.
        try:
            self.client.execute(
                "SELECT COUNT(*) FROM functional.alltypes a JOIN [SHUFFLE] \
        functional.alltypes b on a.id = b.id")
            assert "Query should have thrown an error"
        except ImpalaBeeswaxException:
            pass

        for v in verifiers:
            # Long timeout required because fragments may be blocked while sending data. The
            # default value of --datastream_sender_timeout_ms is 120s before they wake up and
            # cancel themselves.
            #
            # TODO: Fix when we have cancellable RPCs.
            v.wait_for_metric(self.IN_FLIGHT_FRAGMENTS, 0, timeout=125)
 def _start_impala_cluster(cls,
                           options,
                           log_dir=os.getenv('LOG_DIR', "/tmp/"),
                           cluster_size=CLUSTER_SIZE,
                           log_level=1):
     cls.impala_log_dir = log_dir
     cmd = [
         os.path.join(IMPALA_HOME, 'bin/start-impala-cluster.py'),
         '--cluster_size=%d' % cluster_size,
         '--log_dir=%s' % log_dir,
         '--log_level=%s' % log_level
     ]
     try:
         check_call(cmd + options, close_fds=True)
     finally:
         # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
         cls.cluster = ImpalaCluster()
     statestored = cls.cluster.statestored
     if statestored is None:
         raise Exception("statestored was not found")
     statestored.service.wait_for_live_subscribers(NUM_SUBSCRIBERS,
                                                   timeout=60)
     for impalad in cls.cluster.impalads:
         impalad.service.wait_for_num_known_live_backends(CLUSTER_SIZE,
                                                          timeout=60)
Esempio n. 29
0
 def test_backends_are_idle(self):
     """Test that the backends state is in a valid state when quiesced - i.e.
 no queries are running and the admission control state reflects that no
 resources are used."""
     for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
         verifier = MetricVerifier(impalad.service)
         verifier.wait_for_backend_admission_control_state()
Esempio n. 30
0
    def test_failpoints(self, vector):
        query = vector.get_value('query')
        action = vector.get_value('action')
        location = vector.get_value('location')
        vector.get_value('exec_option')['mt_dop'] = vector.get_value('mt_dop')

        plan_node_ids = self.__parse_plan_nodes_from_explain(query, vector)
        for node_id in plan_node_ids:
            debug_action = '%d:%s:%s' % (node_id, location,
                                         FAILPOINT_ACTION_MAP[action])
            # IMPALA-7046: add jitter to backend startup to exercise various failure paths.
            debug_action += '|COORD_BEFORE_EXEC_RPC:JITTER@[email protected]'

            LOG.info('Current debug action: SET DEBUG_ACTION=%s' %
                     debug_action)
            vector.get_value('exec_option')['debug_action'] = debug_action

            if action == 'CANCEL':
                self.__execute_cancel_action(query, vector)
            elif action == 'FAIL' or action == 'MEM_LIMIT_EXCEEDED':
                self.__execute_fail_action(query, vector)
            else:
                assert 0, 'Unknown action: %s' % action

        # We should be able to execute the same query successfully when no failures are
        # injected.
        del vector.get_value('exec_option')['debug_action']
        self.execute_query(query, vector.get_value('exec_option'))

        # Detect any hung fragments left from this test.
        for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
            verifier = MetricVerifier(impalad.service)
            verifier.wait_for_metric("impala-server.num-fragments-in-flight",
                                     0)
  def test_pull_stats_profile(self, vector, unique_database):
    """Checks that the frontend profile includes metrics when computing
       incremental statistics.
    """
    try:
      client = ImpalaCluster().impalads[0].service.create_beeswax_client()
      create = "create table test like functional.alltypes"
      load = "insert into test partition(year, month) select * from functional.alltypes"
      insert = """insert into test partition(year=2009, month=1) values
                  (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
                  "10/21/09","4","2009-10-21 03:24:09.600000000")"""
      stats_all = "compute incremental stats test"
      stats_part = "compute incremental stats test partition (year=2009,month=1)"

      # Checks that profile does not have metrics for incremental stats when
      # the operation is not 'compute incremental stats'.
      self.execute_query_expect_success(client, "use %s" % unique_database)
      profile = self.execute_query_expect_success(client, create).runtime_profile
      assert profile.count("StatsFetch") == 0
      # Checks that incremental stats metrics are present when 'compute incremental
      # stats' is run. Since the table has no stats, expect that no bytes are fetched.
      self.execute_query_expect_success(client, load)
      profile = self.execute_query_expect_success(client, stats_all).runtime_profile
      assert profile.count("StatsFetch") > 1
      assert profile.count("StatsFetch.CompressedBytes: 0") == 1
      # Checks that bytes fetched is non-zero since incremental stats are present now
      # and should have been fetched.
      self.execute_query_expect_success(client, insert)
      profile = self.execute_query_expect_success(client, stats_part).runtime_profile
      assert profile.count("StatsFetch") > 1
      assert profile.count("StatsFetch.CompressedBytes") == 1
      assert profile.count("StatsFetch.CompressedBytes: 0") == 0
      # Adds a partition, computes stats, and checks that the metrics in the profile
      # reflect the operation.
      alter = "alter table test add partition(year=2011, month=1)"
      insert_new_partition = """
          insert into test partition(year=2011, month=1) values
          (29349999, true, 4, 4, 4, 40,4.400000095367432,40.4,
          "10/21/09","4","2009-10-21 03:24:09.600000000")
          """
      self.execute_query_expect_success(client, alter)
      self.execute_query_expect_success(client, insert_new_partition)
      profile = self.execute_query_expect_success(client, stats_all).runtime_profile
      assert profile.count("StatsFetch.TotalPartitions: 25") == 1
      assert profile.count("StatsFetch.NumPartitionsWithStats: 24") == 1
    finally:
      client.close()
    def test_hash_join_timer(self, vector):
        # This test runs serially because it requires the query to come back within
        # some amount of time. Running this with other tests makes it hard to bound
        # that time. It also assumes that it will be able to get a thread token to
        # execute the join build in parallel.
        test_case = vector.get_value('test cases')
        query = test_case[0]
        join_type = test_case[1]

        # Ensure that the cluster is idle before starting the test query.
        for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
            verifier = MetricVerifier(impalad.service)
            verifier.wait_for_metric("impala-server.num-fragments-in-flight",
                                     0)

        # Execute the query. The query summary and profile are stored in 'result'.
        result = self.execute_query(query, vector.get_value('exec_option'))

        # Parse the query summary; The join node is "id=3".
        # In the ExecSummary, search for the join operator's summary and verify the
        # avg and max times are within acceptable limits.
        exec_summary = result.exec_summary
        check_execsummary_count = 0
        join_node_name = "03:%s" % (join_type)
        for line in exec_summary:
            if line['operator'] == join_node_name:
                avg_time_ms = line['avg_time'] / self.NANOS_PER_MILLI
                self.__verify_join_time(avg_time_ms, "ExecSummary Avg")
                max_time_ms = line['max_time'] / self.NANOS_PER_MILLI
                self.__verify_join_time(max_time_ms, "ExecSummary Max")
                check_execsummary_count += 1
        assert (check_execsummary_count == 1), \
            "Unable to verify ExecSummary: {0}".format(exec_summary)

        # Parse the query profile; The join node is "id=3".
        # In the profiles, search for lines containing "(id=3)" and parse for the avg and
        # non-child times to verify that they are within acceptable limits. Also verify
        # that the build side is built in a different thread by searching for the string:
        # "Join Build-Side Prepared Asynchronously"
        profile = result.runtime_profile
        check_fragment_count = 0
        asyn_build = False
        for line in profile.split("\n"):
            if ("(id=3)" in line):
                # Sample line:
                # HASH_JOIN_NODE (id=3):(Total: 3s580ms, non-child: 11.89ms, % non-child: 0.31%)
                strip1 = re.split("non-child: ", line)[1]
                non_child_time = re.split(", ", strip1)[0]
                non_child_time_ms = parse_duration_string_ms(non_child_time)
                self.__verify_join_time(non_child_time_ms,
                                        "Fragment non-child")
                check_fragment_count += 1
            # Search for "Join Build-Side Prepared Asynchronously"
            if ("Join Build-Side Prepared Asynchronously" in line):
                asyn_build = True
        assert (asyn_build
                ), "Join is not prepared asynchronously: {0}".format(profile)
        assert (check_fragment_count > 1), \
            "Unable to verify Fragment or Average Fragment: {0}".format(profile)
Esempio n. 33
0
  def test_execqueryfinstances_timeout(self, vector):
    for i in range(3):
      ex= self.execute_query_expect_failure(self.client, self.TEST_QUERY)
      assert "RPC recv timed out" in str(ex)
    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]

    for v in verifiers:
      v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
      v.verify_num_unused_buffers()
  def test_hash_join_timer(self, vector):
    # This test runs serially because it requires the query to come back within
    # some amount of time. Running this with other tests makes it hard to bound
    # that time. It also assumes that it will be able to get a thread token to
    # execute the join build in parallel.
    test_case = vector.get_value('test cases')
    query = test_case[0]
    join_type = test_case[1]

    # Ensure that the cluster is idle before starting the test query.
    for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
      verifier = MetricVerifier(impalad.service)
      verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)

    # Execute the query. The query summary and profile are stored in 'result'.
    result = self.execute_query(query, vector.get_value('exec_option'))

    # Parse the query summary; The join node is "id=3".
    # In the ExecSummary, search for the join operator's summary and verify the
    # avg and max times are within acceptable limits.
    exec_summary = result.exec_summary
    check_execsummary_count = 0
    join_node_name = "03:%s" % (join_type)
    for line in exec_summary:
      if line['operator'] == join_node_name:
        avg_time_ms = line['avg_time'] / self.NANOS_PER_MILLI
        self.__verify_join_time(avg_time_ms, "ExecSummary Avg")
        max_time_ms = line['max_time'] / self.NANOS_PER_MILLI
        self.__verify_join_time(max_time_ms, "ExecSummary Max")
        check_execsummary_count += 1
    assert (check_execsummary_count == 1), \
        "Unable to verify ExecSummary: {0}".format(exec_summary)

    # Parse the query profile; The join node is "id=3".
    # In the profiles, search for lines containing "(id=3)" and parse for the avg and
    # non-child times to verify that they are within acceptable limits. Also verify
    # that the build side is built in a different thread by searching for the string:
    # "Join Build-Side Prepared Asynchronously"
    profile = result.runtime_profile
    check_fragment_count = 0
    asyn_build = False
    for line in profile.split("\n"):
      if ("(id=3)" in line):
        # Sample line:
        # HASH_JOIN_NODE (id=3):(Total: 3s580ms, non-child: 11.89ms, % non-child: 0.31%)
        strip1 = re.split("non-child: ", line)[1]
        non_child_time = re.split(", ", strip1)[0]
        non_child_time_ms = parse_duration_string_ms(non_child_time)
        self.__verify_join_time(non_child_time_ms, "Fragment non-child")
        check_fragment_count += 1
      # Search for "Join Build-Side Prepared Asynchronously"
      if ("Join Build-Side Prepared Asynchronously" in line):
        asyn_build = True
    assert (asyn_build), "Join is not prepared asynchronously: {0}".format(profile)
    assert (check_fragment_count > 1), \
        "Unable to verify Fragment or Average Fragment: {0}".format(profile)
Esempio n. 35
0
 def _get_json_queries(self, http_addr):
   """Get the json output of the /queries page from the impalad web UI at http_addr."""
   if IS_DOCKERIZED_TEST_CLUSTER:
     # The hostnames in the dockerized cluster may not be externally reachable.
     cluster = ImpalaCluster.get_e2e_test_cluster()
     return cluster.impalads[0].service.get_debug_webpage_json("/queries")
   else:
     resp = urlopen("http://%s/queries?json" % http_addr)
     assert resp.msg == 'OK'
     return json.loads(resp.read())
  def _start_impala_cluster(cls, options, impala_log_dir=os.getenv('LOG_DIR', "/tmp/"),
      cluster_size=DEFAULT_CLUSTER_SIZE, num_coordinators=NUM_COORDINATORS,
      use_exclusive_coordinators=False, log_level=1,
      expected_num_executors=DEFAULT_CLUSTER_SIZE, default_query_options=None):
    cls.impala_log_dir = impala_log_dir
    # We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests specifically test that
    # certain custom startup arguments work and we want to keep them independent of dev
    # environments.
    cmd = [os.path.join(IMPALA_HOME, 'bin/start-impala-cluster.py'),
           '--cluster_size=%d' % cluster_size,
           '--num_coordinators=%d' % num_coordinators,
           '--log_dir=%s' % impala_log_dir,
           '--log_level=%s' % log_level]

    if use_exclusive_coordinators:
      cmd.append("--use_exclusive_coordinators")

    if pytest.config.option.use_local_catalog:
      cmd.append("--impalad_args=--use_local_catalog=1")
      cmd.append("--catalogd_args=--catalog_topic_mode=minimal")

    if pytest.config.option.pull_incremental_statistics:
      cmd.append("--impalad_args=%s --catalogd_args=%s" %
                 ("--pull_incremental_statistics", "--pull_incremental_statistics"))

    default_query_option_kvs = []
    # Put any defaults first, then any arguments after that so they can override defaults.
    if os.environ.get("ERASURE_CODING") == "true":
      default_query_option_kvs.append(("allow_erasure_coded_files", "true"))
    if default_query_options is not None:
      default_query_option_kvs.extend(default_query_options)
    # Add the default query options after any arguments. This will override any default
    # options set in --impalad_args by design to force tests to pass default_query_options
    # into this function directly.
    options.append("--impalad_args=--default_query_options={0}".format(
        ','.join(["{0}={1}".format(k, v) for k, v in default_query_option_kvs])))

    logging.info("Starting cluster with command: %s" %
                 " ".join(pipes.quote(arg) for arg in cmd + options))
    try:
      check_call(cmd + options, close_fds=True)
    finally:
      # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
      cls.cluster = ImpalaCluster.get_e2e_test_cluster()
    statestored = cls.cluster.statestored
    if statestored is None:
      raise Exception("statestored was not found")

    # The number of statestore subscribers is
    # cluster_size (# of impalad) + 1 (for catalogd).
    expected_subscribers = cluster_size + 1

    statestored.service.wait_for_live_subscribers(expected_subscribers, timeout=60)
    for impalad in cls.cluster.impalads:
      impalad.service.wait_for_num_known_live_backends(expected_num_executors, timeout=60)
  def _start_impala_cluster(cls, options, impala_log_dir=os.getenv('LOG_DIR', "/tmp/"),
      cluster_size=DEFAULT_CLUSTER_SIZE, num_coordinators=NUM_COORDINATORS,
      use_exclusive_coordinators=False, log_level=1,
      expected_num_executors=DEFAULT_CLUSTER_SIZE, default_query_options=None):
    cls.impala_log_dir = impala_log_dir
    # We ignore TEST_START_CLUSTER_ARGS here. Custom cluster tests specifically test that
    # certain custom startup arguments work and we want to keep them independent of dev
    # environments.
    cmd = [os.path.join(IMPALA_HOME, 'bin/start-impala-cluster.py'),
           '--cluster_size=%d' % cluster_size,
           '--num_coordinators=%d' % num_coordinators,
           '--log_dir=%s' % impala_log_dir,
           '--log_level=%s' % log_level]

    if use_exclusive_coordinators:
      cmd.append("--use_exclusive_coordinators")

    if pytest.config.option.use_local_catalog:
      cmd.append("--impalad_args=--use_local_catalog=1")
      cmd.append("--catalogd_args=--catalog_topic_mode=minimal")

    if pytest.config.option.pull_incremental_statistics:
      cmd.append("--impalad_args=%s --catalogd_args=%s" %
                 ("--pull_incremental_statistics", "--pull_incremental_statistics"))

    default_query_option_kvs = []
    # Put any defaults first, then any arguments after that so they can override defaults.
    if os.environ.get("ERASURE_CODING") == "true":
      default_query_option_kvs.append(("allow_erasure_coded_files", "true"))
    if default_query_options is not None:
      default_query_option_kvs.extend(default_query_options)
    # Add the default query options after any arguments. This will override any default
    # options set in --impalad_args by design to force tests to pass default_query_options
    # into this function directly.
    options.append("--impalad_args=--default_query_options={0}".format(
        ','.join(["{0}={1}".format(k, v) for k, v in default_query_option_kvs])))

    logging.info("Starting cluster with command: %s" %
                 " ".join(pipes.quote(arg) for arg in cmd + options))
    try:
      check_call(cmd + options, close_fds=True)
    finally:
      # Failure tests expect cluster to be initialised even if start-impala-cluster fails.
      cls.cluster = ImpalaCluster.get_e2e_test_cluster()
    statestored = cls.cluster.statestored
    if statestored is None:
      raise Exception("statestored was not found")

    # The number of statestore subscribers is
    # cluster_size (# of impalad) + 1 (for catalogd).
    expected_subscribers = cluster_size + 1

    statestored.service.wait_for_live_subscribers(expected_subscribers, timeout=60)
    for impalad in cls.cluster.impalads:
      impalad.service.wait_for_num_known_live_backends(expected_num_executors, timeout=60)
Esempio n. 38
0
    def test_hive_udfs_missing_jar(self, vector, unique_database):
        """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
        # Copy hive-exec.jar to a temporary file
        jar_path = get_fs_path(
            "/test-warehouse/{0}.db/".format(unique_database) +
            get_random_id(5) + ".jar")
        hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
        check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
        drop_fn_stmt = ("drop function if exists "
                        "`{0}`.`pi_missing_jar`()".format(unique_database))
        create_fn_stmt = (
            "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
            "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(
                unique_database, jar_path))

        cluster = ImpalaCluster()
        impalad = cluster.get_any_impalad()
        client = impalad.service.create_beeswax_client()
        # Create and drop functions with sync_ddl to make sure they are reflected
        # in every impalad.
        exec_option = copy(vector.get_value('exec_option'))
        exec_option['sync_ddl'] = 1

        self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
        self.execute_query_expect_success(client, create_fn_stmt, exec_option)
        # Delete the udf jar
        check_call(["hadoop", "fs", "-rm", jar_path])

        different_impalad = cluster.get_different_impalad(impalad)
        client = different_impalad.service.create_beeswax_client()
        # Run a query using the udf from an impalad other than the one
        # we used to create the function. This is to bypass loading from
        # the cache
        try:
            self.execute_query_using_client(
                client,
                "select `{0}`.`pi_missing_jar`()".format(unique_database),
                vector)
            assert False, "Query expected to fail"
        except ImpalaBeeswaxException, e:
            assert "Failed to get file info" in str(e)
Esempio n. 39
0
  def execute_query_verify_metrics(self, query, query_options=None, repeat=1):
    for i in range(repeat):
      try:
        self.execute_query(query, query_options)
      except ImpalaBeeswaxException:
        pass
    verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]

    for v in verifiers:
      v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
      v.verify_num_unused_buffers()
Esempio n. 40
0
 def verify_mem_usage(self, non_zero_peak_metrics):
   """Verifies that the memory used by KRPC is returned to the memtrackers and that
   metrics in 'non_zero_peak_metrics' have a peak value > 0.
   """
   verifiers = [ MemUsageVerifier(i.service) for i in ImpalaCluster().impalads ]
   for verifier in verifiers:
     for metric_name in ALL_METRICS:
       usage = verifier.get_mem_usage_values(metric_name)
       assert usage["total"] == 0
       if metric_name in non_zero_peak_metrics:
         assert usage["peak"] > 0, metric_name
 def verify_mem_usage(self, non_zero_peak_metrics):
   """Verifies that the memory used by KRPC is returned to the memtrackers and that
   metrics in 'non_zero_peak_metrics' have a peak value > 0.
   """
   verifiers = [MemUsageVerifier(i.service)
                for i in ImpalaCluster.get_e2e_test_cluster().impalads]
   for verifier in verifiers:
     for metric_name in ALL_METRICS:
       usage = verifier.get_mem_usage_values(metric_name)
       assert usage["total"] == 0
       if metric_name in non_zero_peak_metrics:
         assert usage["peak"] > 0, metric_name
Esempio n. 42
0
 def test_insert_mem_limit(self, vector):
   if (vector.get_value('table_format').file_format == 'parquet'):
     vector.get_value('exec_option')['COMPRESSION_CODEC'] = \
         vector.get_value('compression_codec')
   self.run_test_case('QueryTest/insert-mem-limit', vector,
       multiple_impalad=vector.get_value('exec_option')['sync_ddl'] == 1)
   # IMPALA-7023: These queries can linger and use up memory, causing subsequent
   # tests to hit memory limits. Wait for some time to allow the query to
   # be reclaimed.
   verifiers = [MetricVerifier(i.service)
                for i in ImpalaCluster.get_e2e_test_cluster().impalads]
   for v in verifiers:
     v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=60)
Esempio n. 43
0
  def test_failure_in_prepare(self):
    # Fail the scan node
    verifiers = [MetricVerifier(i.service)
                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
    self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
    try:
      self.client.execute("SELECT COUNT(*) FROM functional.alltypes")
      assert "Query should have thrown an error"
    except ImpalaBeeswaxException:
      pass

    for v in verifiers:
      v.wait_for_metric(self.IN_FLIGHT_FRAGMENTS, 0)
Esempio n. 44
0
  def test_hive_udfs_missing_jar(self, vector, unique_database):
    """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present
    on HDFS"""
    # Copy hive-exec.jar to a temporary file
    jar_path = get_fs_path("/test-warehouse/{0}.db/".format(unique_database)
                           + get_random_id(5) + ".jar")
    hive_jar = get_fs_path("/test-warehouse/hive-exec.jar")
    check_call(["hadoop", "fs", "-cp", hive_jar, jar_path])
    drop_fn_stmt = (
        "drop function if exists "
        "`{0}`.`pi_missing_jar`()".format(unique_database))
    create_fn_stmt = (
        "create function `{0}`.`pi_missing_jar`() returns double location '{1}' "
        "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path))

    cluster = ImpalaCluster()
    impalad = cluster.get_any_impalad()
    client = impalad.service.create_beeswax_client()
    # Create and drop functions with sync_ddl to make sure they are reflected
    # in every impalad.
    exec_option = copy(vector.get_value('exec_option'))
    exec_option['sync_ddl'] = 1

    self.execute_query_expect_success(client, drop_fn_stmt, exec_option)
    self.execute_query_expect_success(client, create_fn_stmt, exec_option)
    # Delete the udf jar
    check_call(["hadoop", "fs", "-rm", jar_path])

    different_impalad = cluster.get_different_impalad(impalad)
    client = different_impalad.service.create_beeswax_client()
    # Run a query using the udf from an impalad other than the one
    # we used to create the function. This is to bypass loading from
    # the cache
    try:
      self.execute_query_using_client(
          client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector)
      assert False, "Query expected to fail"
    except ImpalaBeeswaxException, e:
      assert "Failed to get file info" in str(e)
Esempio n. 45
0
  def test_table_is_cached(self, vector):
    cached_read_metric = "impala-server.io-mgr.cached-bytes-read"
    query_string = "select count(*) from tpch.nation"
    expected_bytes_delta = 2199
    impala_cluster = ImpalaCluster.get_e2e_test_cluster()

    # Collect the cached read metric on all the impalads before running the query
    cached_bytes_before = list()
    for impalad in impala_cluster.impalads:
      cached_bytes_before.append(impalad.service.get_metric_value(cached_read_metric))

    # Execute the query.
    result = self.execute_query(query_string)
    assert(len(result.data) == 1)
    assert(result.data[0] == '25')

    # Read the metrics again.
    cached_bytes_after = list()
    for impalad in impala_cluster.impalads:
      cached_bytes_after.append(impalad.service.get_metric_value(cached_read_metric))

    # Verify that the cached bytes increased by the expected number on exactly one of
    # the impalads.
    num_metrics_increased = 0
    assert(len(cached_bytes_before) == len(cached_bytes_after))
    for i in range(0, len(cached_bytes_before)):
      assert(cached_bytes_before[i] == cached_bytes_after[i] or\
             cached_bytes_before[i] + expected_bytes_delta == cached_bytes_after[i])
      if cached_bytes_after[i] > cached_bytes_before[i]:
        num_metrics_increased = num_metrics_increased + 1

    if IS_DOCKERIZED_TEST_CLUSTER:
      assert num_metrics_increased == 0, "HDFS caching is disabled in dockerised cluster."
    elif num_metrics_increased != 1:
      # Test failed, print the metrics
      for i in range(0, len(cached_bytes_before)):
        print "%d %d" % (cached_bytes_before[i], cached_bytes_after[i])
      assert(False)
Esempio n. 46
0
  def test_failure_in_prepare_multi_fragment(self):
    # Test that if one fragment fails that the others are cleaned up during the ensuing
    # cancellation.
    verifiers = [MetricVerifier(i.service)
                 for i in ImpalaCluster.get_e2e_test_cluster().impalads]
    # Fail the scan node
    self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");

    # Force a query plan that will have three fragments or more.
    try:
      self.client.execute("SELECT COUNT(*) FROM functional.alltypes a JOIN [SHUFFLE] \
        functional.alltypes b on a.id = b.id")
      assert "Query should have thrown an error"
    except ImpalaBeeswaxException:
      pass

    for v in verifiers:
      # Long timeout required because fragments may be blocked while sending data. The
      # default value of --datastream_sender_timeout_ms is 120s before they wake up and
      # cancel themselves.
      #
      # TODO: Fix when we have cancellable RPCs.
      v.wait_for_metric(self.IN_FLIGHT_FRAGMENTS, 0, timeout=125)
Esempio n. 47
0
  def test_views_describe(self, vector, unique_database):
    # IMPALA-6896: Tests that altered views can be described by all impalads.
    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
    impalads = impala_cluster.impalads
    view_name = "%s.test_describe_view" % unique_database
    query_opts = vector.get_value('exec_option')
    first_client = impalads[0].service.create_beeswax_client()
    try:
      # Create a view and verify it's visible.
      self.execute_query_expect_success(first_client,
                                        "create view {0} as "
                                        "select * from functional.alltypes"
                                        .format(view_name), query_opts)
      self._verify_describe_view(vector, view_name, "select * from functional.alltypes")

      # Alter the view and verify the alter is visible.
      self.execute_query_expect_success(first_client,
                                        "alter view {0} as "
                                        "select * from functional.alltypesagg"
                                        .format(view_name), query_opts)
      self._verify_describe_view(vector, view_name,
                                 "select * from functional.alltypesagg")
    finally:
      first_client.close()
  def test_get_functions(self, vector, unique_database):
    impala_cluster = ImpalaCluster.get_e2e_test_cluster()
    catalogd = impala_cluster.catalogd.service
    trans_type = 'buffered'
    if pytest.config.option.use_kerberos:
      trans_type = 'kerberos'
    transport = create_transport(host=catalogd.hostname, port=catalogd.service_port,
                                 service='impala', transport_type=trans_type)
    transport.open()
    protocol = TBinaryProtocol.TBinaryProtocol(transport)
    catalog_client = CatalogService.Client(protocol)

    request = TGetFunctionsRequest()
    request.db_name = unique_database
    response = catalog_client.GetFunctions(request)
    assert response.status.status_code == TErrorCode.OK
    assert len(response.functions) == 0

    self.client.execute("create function %s.fn() RETURNS int "
                        "LOCATION '%s/libTestUdfs.so' SYMBOL='Fn'"
                        % (unique_database, WAREHOUSE))

    response = catalog_client.GetFunctions(request)
    LOG.debug(response)
    assert len(response.functions) == 1
    assert len(response.functions[0].arg_types) == 0
    assert response.functions[0].name.db_name == unique_database
    assert response.functions[0].name.function_name == 'fn'
    assert response.functions[0].aggregate_fn is None
    assert response.functions[0].scalar_fn is not None
    assert '/test-warehouse/libTestUdfs.so' in response.functions[0].hdfs_location

    # Add another scalar function with overloaded parameters ensure it shows up.
    self.client.execute("create function %s.fn(int) RETURNS double "\
        "LOCATION '%s/libTestUdfs.so' SYMBOL='Fn'" % (unique_database, WAREHOUSE))
    response = catalog_client.GetFunctions(request)
    LOG.debug(response)
    assert response.status.status_code == TErrorCode.OK
    assert len(response.functions) == 2

    functions = [fn for fn in response.functions]

    # Sort by number of arg in the function (ascending)
    functions.sort(key=lambda fn: len(fn.arg_types))
    assert len(functions[0].arg_types) == 0
    assert len(functions[1].arg_types) == 1
    assert functions[0].signature == 'fn()'
    assert functions[1].signature == 'fn(INT)'

    # Verify aggregate functions can also be retrieved
    self.client.execute("create aggregate function %s.agg_fn(int, string) RETURNS int "
                        "LOCATION '%s/libTestUdas.so' UPDATE_FN='TwoArgUpdate'"
                        % (unique_database, WAREHOUSE))
    response = catalog_client.GetFunctions(request)
    LOG.debug(response)
    assert response.status.status_code == TErrorCode.OK
    assert len(response.functions) == 3
    functions = [fn for fn in response.functions if fn.aggregate_fn is not None]
    # Should be only 1 aggregate function
    assert len(functions) == 1

    # Negative test cases for database name
    request.db_name = unique_database + "_does_not_exist"
    response = catalog_client.GetFunctions(request)
    LOG.debug(response)
    assert response.status.status_code == TErrorCode.GENERAL
    assert 'Database does not exist: ' in str(response.status)

    request = TGetFunctionsRequest()
    response = catalog_client.GetFunctions(request)
    LOG.debug(response)
    assert response.status.status_code == TErrorCode.GENERAL
    assert 'Database name must be set' in str(response.status)
Esempio n. 49
0
  kill_all(force=options.force_kill)

  try:
    import json
    wait_for_cluster = wait_for_cluster_web
  except ImportError:
    print "json module not found, checking for cluster startup through the command-line"
    wait_for_cluster = wait_for_cluster_cmdline

  # If ImpalaCluster cannot be imported, fall back to the command-line to check
  # whether impalads/statestore are up.
  try:
    from tests.common.impala_cluster import ImpalaCluster
    # Make sure the processes have been killed. We loop till we can't detect a single
    # impalad or a statestore process.
    impala_cluster = ImpalaCluster()
    while len(impala_cluster.impalads) != 0 or impala_cluster.statestored or\
          impala_cluster.catalogd:
      impala_cluster.refresh()
  except ImportError:
    print 'ImpalaCluster module not found.'
    wait_for_cluster = wait_for_cluster_cmdline

  if options.inprocess:
    # The statestore and the impalads start in the same process. Additionally,
    # the statestore does not have a debug webpage.
    start_mini_impala_cluster(options.cluster_size)
    wait_for_cluster_cmdline()
  else:
    try:
      start_statestore()
Esempio n. 50
0
 def _run_query_all_impalads(self, exec_options, query, expected):
   impala_cluster = ImpalaCluster.get_e2e_test_cluster()
   for impalad in impala_cluster.impalads:
     client = impalad.service.create_beeswax_client()
     result = self.execute_query_expect_success(client, query, exec_options)
     assert result.data == expected
  def test_native_functions_race(self, vector, unique_database):
    """ IMPALA-6488: stress concurrent adds, uses, and deletes of native functions.
        Exposes a crash caused by use-after-free in lib-cache."""

    # Native function used by a query. Stresses lib-cache during analysis and
    # backend expressions.
    create_fn_to_use = \
      """create function {0}.use_it(string) returns string
         LOCATION '{1}'
         SYMBOL='_Z8IdentityPN10impala_udf15FunctionContextERKNS_9StringValE'"""
    use_fn = """select * from (select max(int_col) from functional.alltypesagg
                where {0}.use_it(string_col) = 'blah' union all
                (select max(int_col) from functional.alltypesagg
                 where {0}.use_it(String_col) > '1' union all
                (select max(int_col) from functional.alltypesagg
                 where {0}.use_it(string_col) > '1'))) v"""
    # Reference to another native function from the same 'so' file. Creating/dropping
    # stresses lib-cache lookup, add, and refresh.
    create_another_fn = """create function if not exists {0}.other(float)
                           returns float location '{1}' symbol='Identity'"""
    drop_another_fn = """drop function if exists {0}.other(float)"""
    udf_path = get_fs_path('/test-warehouse/libTestUdfs.so')

    # Tracks number of impalads prior to tests to check that none have crashed.
    # All impalads are assumed to be coordinators.
    cluster = ImpalaCluster.get_e2e_test_cluster()
    exp_num_coordinators = cluster.num_responsive_coordinators()

    setup_client = self.create_impala_client()
    setup_query = create_fn_to_use.format(unique_database, udf_path)
    try:
      setup_client.execute(setup_query)
    except Exception as e:
      print "Unable to create initial function: {0}".format(setup_query)
      raise

    errors = []

    def use_fn_method():
      time.sleep(1 + random.random())
      client = self.create_impala_client()
      query = use_fn.format(unique_database)
      try:
        client.execute(query)
      except Exception as e:
        errors.append(e)

    def load_fn_method():
      time.sleep(1 + random.random())
      client = self.create_impala_client()
      drop = drop_another_fn.format(unique_database)
      create = create_another_fn.format(unique_database, udf_path)
      try:
        client.execute(drop)
        client.execute(create)
      except Exception as e:
        errors.append(e)

    # number of uses/loads needed to reliably reproduce the bug.
    num_uses = 200
    num_loads = 200

    # create threads to use native function.
    runner_threads = []
    for i in xrange(num_uses):
      runner_threads.append(threading.Thread(target=use_fn_method))

    # create threads to drop/create native functions.
    for i in xrange(num_loads):
      runner_threads.append(threading.Thread(target=load_fn_method))

    # launch all runner threads.
    for t in runner_threads: t.start()

    # join all threads.
    for t in runner_threads: t.join()

    for e in errors: print e

    # Checks that no impalad has crashed.
    assert cluster.num_responsive_coordinators() == exp_num_coordinators
  # Kill existing cluster processes based on the current configuration.
  if options.restart_impalad_only:
    cluster_ops.kill_all_impalads(force=options.force_kill)
  elif options.restart_catalogd_only:
    cluster_ops.kill_catalogd(force=options.force_kill)
  elif options.restart_statestored_only:
    cluster_ops.kill_statestored(force=options.force_kill)
  else:
    cluster_ops.kill_all_daemons(force=options.force_kill)

  if options.kill_only:
    sys.exit(0)

  if options.restart_impalad_only:
    impala_cluster = ImpalaCluster()
    if not impala_cluster.statestored or not impala_cluster.catalogd:
      LOG.info("No running statestored or catalogd detected. "
          "Restarting entire cluster.")
      options.restart_impalad_only = False

  try:
    if options.restart_catalogd_only:
      cluster_ops.start_catalogd()
    elif options.restart_statestored_only:
      cluster_ops.start_statestore()
    elif options.restart_impalad_only:
      cluster_ops.start_impalads(options.cluster_size, options.num_coordinators,
                              options.use_exclusive_coordinators)
    else:
      cluster_ops.start_statestore()
Esempio n. 53
0
    try:
      self.run_test_case('QueryTest/udf-mem-limit', vector, use_db=unique_database)
      assert False, "Query was expected to fail"
    except ImpalaBeeswaxException, e:
      self._check_mem_limit_exception(e)

    try:
      self.run_test_case('QueryTest/uda-mem-limit', vector, use_db=unique_database)
      assert False, "Query was expected to fail"
    except ImpalaBeeswaxException, e:
      self._check_mem_limit_exception(e)

    # It takes a long time for Impala to free up memory after this test, especially if
    # ASAN is enabled. Verify that all fragments finish executing before moving on to the
    # next test to make sure that the next test is not affected.
    for impalad in ImpalaCluster.get_e2e_test_cluster().impalads:
      verifier = MetricVerifier(impalad.service)
      verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
      verifier.verify_num_unused_buffers()

  def test_udf_constant_folding(self, vector, unique_database):
    """Test that constant folding of UDFs is handled correctly. Uses count_rows(),
    which returns a unique value every time it is evaluated in the same thread."""
    exec_options = copy(vector.get_value('exec_option'))
    # Execute on a single node so that all counter values will be unique.
    exec_options["num_nodes"] = 1
    create_fn_query = """create function {database}.count_rows() returns bigint
                         location '{location}' symbol='Count' prepare_fn='CountPrepare'
                         close_fn='CountClose'"""
    self._load_functions(create_fn_query, vector, unique_database,
        get_fs_path('/test-warehouse/libTestUdfs.so'))