def test_low_mem_limit_no_fragments(self, vector):
   self.low_memory_limit_test(vector, 'tpch-q14', self.MIN_MEM_FOR_TPCH['Q14'])
   self.low_memory_limit_test(vector, 'tpch-q18', self.MIN_MEM_FOR_TPCH['Q18'])
   self.low_memory_limit_test(vector, 'tpch-q20', self.MIN_MEM_FOR_TPCH['Q20'])
   for impalad in ImpalaCluster().impalads:
     verifier = MetricVerifier(impalad.service)
     verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
 def test_role_update(self, vector):
   """IMPALA-5355: The initial update from the statestore has the privileges and roles in
   reverse order if a role was modified, but not the associated privilege. Verify that
   Impala is able to handle this.
   """
   role_name = "test_role_" + get_random_id(5)
   try:
     self.client.execute("create role {0}".format(role_name))
     self.client.execute("grant all on server to {0}".format(role_name))
     # Wait a few seconds to make sure the update propagates to the statestore.
     sleep(3)
     # Update the role, increasing its catalog verion.
     self.client.execute("grant role {0} to group `{1}`".format(
         role_name, grp.getgrnam(getuser()).gr_name))
     result = self.client.execute("show tables in functional")
     assert 'alltypes' in result.data
     privileges_before = self.client.execute("show grant role {0}".format(role_name))
     # Wait a few seconds before restarting Impalad to make sure that the Catalog gets
     # updated.
     sleep(3)
     self.restart_first_impalad()
     verifier = MetricVerifier(self.cluster.impalads[0].service)
     verifier.wait_for_metric("catalog.ready", True)
     # Verify that we still have the right privileges after the first impalad was
     # restarted.
     result = self.client.execute("show tables in functional")
     assert 'alltypes' in result.data
     privileges_after = self.client.execute("show grant role {0}".format(role_name))
     assert privileges_before.data == privileges_after.data
   finally:
     self.client.execute("drop role {0}".format(role_name))
Exemple #3
0
 def test_cancel_insert(self, vector):
   self.execute_cancel_test(vector)
   metric_verifier = MetricVerifier(self.impalad_test_service)
   try:
     metric_verifier.verify_no_open_files(timeout=10)
   except AssertionError:
     pytest.xfail("IMPALA-551: File handle leak for INSERT")
  def test_hash_join_timer(self, vector):
    # This test runs serially because it requires the query to come back within
    # some amount of time. Running this with other tests makes it hard to bound
    # that time. It also assumes that it will be able to get a thread token to
    # execute the join build in parallel.
    test_case = vector.get_value('test cases')
    query = test_case[0]
    join_type = test_case[1]

    # Ensure that the cluster is idle before starting the test query.
    for impalad in ImpalaCluster().impalads:
      verifier = MetricVerifier(impalad.service)
      verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)

    # Execute the query. The query summary and profile are stored in 'result'.
    result = self.execute_query(query, vector.get_value('exec_option'))

    # Parse the query summary; The join node is "id=3".
    # In the ExecSummary, search for the join operator's summary and verify the
    # avg and max times are within acceptable limits.
    exec_summary = result.exec_summary
    check_execsummary_count = 0
    join_node_name = "03:%s" % (join_type)
    for line in exec_summary:
      if line['operator'] == join_node_name:
        avg_time_ms = line['avg_time'] / self.NANOS_PER_MILLI
        self.__verify_join_time(avg_time_ms, "ExecSummary Avg")
        max_time_ms = line['max_time'] / self.NANOS_PER_MILLI
        self.__verify_join_time(max_time_ms, "ExecSummary Max")
        check_execsummary_count += 1
    assert (check_execsummary_count == 1), \
        "Unable to verify ExecSummary: {0}".format(exec_summary)

    # Parse the query profile; The join node is "id=3".
    # In the profiles, search for lines containing "(id=3)" and parse for the avg and
    # non-child times to verify that they are within acceptable limits. Also verify
    # that the build side is built in a different thread by searching for the string:
    # "Join Build-Side Prepared Asynchronously"
    profile = result.runtime_profile
    check_fragment_count = 0
    asyn_build = False
    for line in profile.split("\n"):
      if ("(id=3)" in line):
        # Sample line:
        # HASH_JOIN_NODE (id=3):(Total: 3s580ms, non-child: 11.89ms, % non-child: 0.31%)
        strip1 = re.split("non-child: ", line)[1]
        non_child_time = re.split(", ", strip1)[0]
        non_child_time_ms = parse_duration_string_ms(non_child_time)
        self.__verify_join_time(non_child_time_ms, "Fragment non-child")
        check_fragment_count += 1
      # Search for "Join Build-Side Prepared Asynchronously"
      if ("Join Build-Side Prepared Asynchronously" in line):
        asyn_build = True
    assert (asyn_build), "Join is not prepared asynchronously: {0}".format(profile)
    assert (check_fragment_count > 1), \
        "Unable to verify Fragment or Average Fragment: {0}".format(profile)
 def test_num_unused_buffers(self):
   """Test that all buffers are unused"""
   verifier = MetricVerifier(self.impalad_test_service)
   verifier.verify_num_unused_buffers()
      self.run_test_case('QueryTest/udf-mem-limit', vector, use_db=unique_database)
      assert False, "Query was expected to fail"
    except ImpalaBeeswaxException, e:
      self._check_mem_limit_exception(e)

    try:
      self.run_test_case('QueryTest/uda-mem-limit', vector, use_db=unique_database)
      assert False, "Query was expected to fail"
    except ImpalaBeeswaxException, e:
      self._check_mem_limit_exception(e)

    # It takes a long time for Impala to free up memory after this test, especially if
    # ASAN is enabled. Verify that all fragments finish executing before moving on to the
    # next test to make sure that the next test is not affected.
    for impalad in ImpalaCluster().impalads:
      verifier = MetricVerifier(impalad.service)
      verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
      verifier.verify_num_unused_buffers()

  def test_udf_constant_folding(self, vector, unique_database):
    """Test that constant folding of UDFs is handled correctly. Uses count_rows(),
    which returns a unique value every time it is evaluated in the same thread."""
    exec_options = copy(vector.get_value('exec_option'))
    # Execute on a single node so that all counter values will be unique.
    exec_options["num_nodes"] = 1
    create_fn_query = """create function {database}.count_rows() returns bigint
                         location '{location}' symbol='Count' prepare_fn='CountPrepare'
                         close_fn='CountClose'"""
    self._load_functions(create_fn_query, vector, unique_database,
        get_fs_path('/test-warehouse/libTestUdfs.so'))
 def test_metrics_are_zero(self):
   """Test that all the metric in METRIC_LIST are 0"""
   verifier = MetricVerifier(self.impalad_test_service)
   verifier.verify_metrics_are_zero()
 def test_cancel_insert(self, vector):
   self.execute_cancel_test(vector)
   metric_verifier = MetricVerifier(self.impalad_test_service)
   metric_verifier.verify_no_open_files(timeout=10)
Exemple #9
0
        except ImpalaBeeswaxException, e:
            self._check_exception(e)

        try:
            self.run_test_case('QueryTest/uda-mem-limit',
                               vector,
                               use_db=unique_database)
            assert False, "Query was expected to fail"
        except ImpalaBeeswaxException, e:
            self._check_exception(e)

        # It takes a long time for Impala to free up memory after this test, especially if
        # ASAN is enabled. Verify that all fragments finish executing before moving on to the
        # next test to make sure that the next test is not affected.
        for impalad in ImpalaCluster().impalads:
            verifier = MetricVerifier(impalad.service)
            verifier.wait_for_metric("impala-server.num-fragments-in-flight",
                                     0)
            verifier.verify_num_unused_buffers()

    def test_udf_constant_folding(self, vector, unique_database):
        """Test that constant folding of UDFs is handled correctly. Uses count_rows(),
    which returns a unique value every time it is evaluated in the same thread."""
        exec_options = copy(vector.get_value('exec_option'))
        # Execute on a single node so that all counter values will be unique.
        exec_options["num_nodes"] = 1
        create_fn_query = """create function {database}.count_rows() returns bigint
                         location '{location}' symbol='Count' prepare_fn='CountPrepare'
                         close_fn='CountClose'"""
        self._load_functions(create_fn_query, vector, unique_database,
                             get_fs_path('/test-warehouse/libTestUdfs.so'))
    def test_hash_join_timer(self, vector):
        # This test runs serially because it requires the query to come back within
        # some amount of time. Running this with other tests makes it hard to bound
        # that time. It also assumes that it will be able to get a thread token to
        # execute the join build in parallel.
        test_case = vector.get_value('test cases')
        query = test_case[0]
        join_type = test_case[1]

        # Ensure that the cluster is idle before starting the test query.
        for impalad in ImpalaCluster().impalads:
            verifier = MetricVerifier(impalad.service)
            verifier.wait_for_metric("impala-server.num-fragments-in-flight",
                                     0)

        # Execute async to get a handle. Wait until the query has completed.
        handle = self.execute_query_async(query,
                                          vector.get_value('exec_option'))
        self.impalad_test_service.wait_for_query_state(
            self.client,
            handle,
            self.client.QUERY_STATES['FINISHED'],
            timeout=40)
        self.close_query(handle)

        # Parse the query profile
        # The hash join node is "id=3".
        # In the ExecSummary, search for "03:HASH JOIN" line, column 3 (avg) and 4 (max).
        # In the fragment (including average), search for "HASH_JOIN_NODE (id=2)" and the
        # non-child time.
        # Also verify that the build side is in a different thread by searching for:
        #     "Join Build-Side Prepared Asynchronously"
        profile = self.client.get_runtime_profile(handle)
        check_execsummary_count = 0
        check_fragment_count = 0
        asyn_build = False

        for line in profile.split("\n"):
            # Matching for ExecSummary
            if ("03:%s  " % (join_type) in line):
                # Sample line:
                # 03:HASH JOIN           3    11.89ms   12.543ms  6.57K  ...
                # Split using "JOIN +", then split the right side with space. This becomes:
                #   "3","11.89ms","12.543ms",...
                # The second column is the average, and the 3rd column is the max
                rhs = re.split("JOIN +", line)[1]
                columns = re.split(" +", rhs)
                self.__verify_join_time(columns[1], "ExecSummary Avg")
                self.__verify_join_time(columns[2], "ExecSummary Max")
                check_execsummary_count = 1
            # Matching for Fragment (including Average
            if ("(id=3)" in line):
                # Sample line:
                # HASH_JOIN_NODE (id=3):(Total: 3s580ms, non-child: 11.89ms, % non-child: 0.31%)
                strip1 = re.split("non-child: ", line)[1]
                non_child_time = re.split(", ", strip1)[0]
                self.__verify_join_time(non_child_time, "Fragment non-child")
                check_fragment_count = check_fragment_count + 1
            # Search for "Join Build-Side Prepared Asynchronously"
            if ("Join Build-Side Prepared Asynchronously" in line):
                asyn_build = True

        assert (asyn_build
                ), "Join is not prepared asynchronously: {0}".format(profile)
        assert (check_fragment_count > 1), \
            "Unable to verify Fragment or Average Fragment: {0}".format(profile)
        assert (check_execsummary_count == 1), \
            "Unable to verify ExecSummary: {0}".format(profile)
 def test_cancel_insert(self, vector):
     self.execute_cancel_test(vector)
     metric_verifier = MetricVerifier(self.impalad_test_service)
     metric_verifier.verify_no_open_files(timeout=10)