def test_low_mem_limit_no_fragments(self, vector): self.low_memory_limit_test(vector, 'tpch-q14', self.MIN_MEM_FOR_TPCH['Q14']) self.low_memory_limit_test(vector, 'tpch-q18', self.MIN_MEM_FOR_TPCH['Q18']) self.low_memory_limit_test(vector, 'tpch-q20', self.MIN_MEM_FOR_TPCH['Q20']) for impalad in ImpalaCluster().impalads: verifier = MetricVerifier(impalad.service) verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0)
def test_role_update(self, vector): """IMPALA-5355: The initial update from the statestore has the privileges and roles in reverse order if a role was modified, but not the associated privilege. Verify that Impala is able to handle this. """ role_name = "test_role_" + get_random_id(5) try: self.client.execute("create role {0}".format(role_name)) self.client.execute("grant all on server to {0}".format(role_name)) # Wait a few seconds to make sure the update propagates to the statestore. sleep(3) # Update the role, increasing its catalog verion. self.client.execute("grant role {0} to group `{1}`".format( role_name, grp.getgrnam(getuser()).gr_name)) result = self.client.execute("show tables in functional") assert 'alltypes' in result.data privileges_before = self.client.execute("show grant role {0}".format(role_name)) # Wait a few seconds before restarting Impalad to make sure that the Catalog gets # updated. sleep(3) self.restart_first_impalad() verifier = MetricVerifier(self.cluster.impalads[0].service) verifier.wait_for_metric("catalog.ready", True) # Verify that we still have the right privileges after the first impalad was # restarted. result = self.client.execute("show tables in functional") assert 'alltypes' in result.data privileges_after = self.client.execute("show grant role {0}".format(role_name)) assert privileges_before.data == privileges_after.data finally: self.client.execute("drop role {0}".format(role_name))
def test_cancel_insert(self, vector): self.execute_cancel_test(vector) metric_verifier = MetricVerifier(self.impalad_test_service) try: metric_verifier.verify_no_open_files(timeout=10) except AssertionError: pytest.xfail("IMPALA-551: File handle leak for INSERT")
def test_hash_join_timer(self, vector): # This test runs serially because it requires the query to come back within # some amount of time. Running this with other tests makes it hard to bound # that time. It also assumes that it will be able to get a thread token to # execute the join build in parallel. test_case = vector.get_value('test cases') query = test_case[0] join_type = test_case[1] # Ensure that the cluster is idle before starting the test query. for impalad in ImpalaCluster().impalads: verifier = MetricVerifier(impalad.service) verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0) # Execute the query. The query summary and profile are stored in 'result'. result = self.execute_query(query, vector.get_value('exec_option')) # Parse the query summary; The join node is "id=3". # In the ExecSummary, search for the join operator's summary and verify the # avg and max times are within acceptable limits. exec_summary = result.exec_summary check_execsummary_count = 0 join_node_name = "03:%s" % (join_type) for line in exec_summary: if line['operator'] == join_node_name: avg_time_ms = line['avg_time'] / self.NANOS_PER_MILLI self.__verify_join_time(avg_time_ms, "ExecSummary Avg") max_time_ms = line['max_time'] / self.NANOS_PER_MILLI self.__verify_join_time(max_time_ms, "ExecSummary Max") check_execsummary_count += 1 assert (check_execsummary_count == 1), \ "Unable to verify ExecSummary: {0}".format(exec_summary) # Parse the query profile; The join node is "id=3". # In the profiles, search for lines containing "(id=3)" and parse for the avg and # non-child times to verify that they are within acceptable limits. Also verify # that the build side is built in a different thread by searching for the string: # "Join Build-Side Prepared Asynchronously" profile = result.runtime_profile check_fragment_count = 0 asyn_build = False for line in profile.split("\n"): if ("(id=3)" in line): # Sample line: # HASH_JOIN_NODE (id=3):(Total: 3s580ms, non-child: 11.89ms, % non-child: 0.31%) strip1 = re.split("non-child: ", line)[1] non_child_time = re.split(", ", strip1)[0] non_child_time_ms = parse_duration_string_ms(non_child_time) self.__verify_join_time(non_child_time_ms, "Fragment non-child") check_fragment_count += 1 # Search for "Join Build-Side Prepared Asynchronously" if ("Join Build-Side Prepared Asynchronously" in line): asyn_build = True assert (asyn_build), "Join is not prepared asynchronously: {0}".format(profile) assert (check_fragment_count > 1), \ "Unable to verify Fragment or Average Fragment: {0}".format(profile)
def test_num_unused_buffers(self): """Test that all buffers are unused""" verifier = MetricVerifier(self.impalad_test_service) verifier.verify_num_unused_buffers()
self.run_test_case('QueryTest/udf-mem-limit', vector, use_db=unique_database) assert False, "Query was expected to fail" except ImpalaBeeswaxException, e: self._check_mem_limit_exception(e) try: self.run_test_case('QueryTest/uda-mem-limit', vector, use_db=unique_database) assert False, "Query was expected to fail" except ImpalaBeeswaxException, e: self._check_mem_limit_exception(e) # It takes a long time for Impala to free up memory after this test, especially if # ASAN is enabled. Verify that all fragments finish executing before moving on to the # next test to make sure that the next test is not affected. for impalad in ImpalaCluster().impalads: verifier = MetricVerifier(impalad.service) verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0) verifier.verify_num_unused_buffers() def test_udf_constant_folding(self, vector, unique_database): """Test that constant folding of UDFs is handled correctly. Uses count_rows(), which returns a unique value every time it is evaluated in the same thread.""" exec_options = copy(vector.get_value('exec_option')) # Execute on a single node so that all counter values will be unique. exec_options["num_nodes"] = 1 create_fn_query = """create function {database}.count_rows() returns bigint location '{location}' symbol='Count' prepare_fn='CountPrepare' close_fn='CountClose'""" self._load_functions(create_fn_query, vector, unique_database, get_fs_path('/test-warehouse/libTestUdfs.so'))
def test_metrics_are_zero(self): """Test that all the metric in METRIC_LIST are 0""" verifier = MetricVerifier(self.impalad_test_service) verifier.verify_metrics_are_zero()
def test_cancel_insert(self, vector): self.execute_cancel_test(vector) metric_verifier = MetricVerifier(self.impalad_test_service) metric_verifier.verify_no_open_files(timeout=10)
except ImpalaBeeswaxException, e: self._check_exception(e) try: self.run_test_case('QueryTest/uda-mem-limit', vector, use_db=unique_database) assert False, "Query was expected to fail" except ImpalaBeeswaxException, e: self._check_exception(e) # It takes a long time for Impala to free up memory after this test, especially if # ASAN is enabled. Verify that all fragments finish executing before moving on to the # next test to make sure that the next test is not affected. for impalad in ImpalaCluster().impalads: verifier = MetricVerifier(impalad.service) verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0) verifier.verify_num_unused_buffers() def test_udf_constant_folding(self, vector, unique_database): """Test that constant folding of UDFs is handled correctly. Uses count_rows(), which returns a unique value every time it is evaluated in the same thread.""" exec_options = copy(vector.get_value('exec_option')) # Execute on a single node so that all counter values will be unique. exec_options["num_nodes"] = 1 create_fn_query = """create function {database}.count_rows() returns bigint location '{location}' symbol='Count' prepare_fn='CountPrepare' close_fn='CountClose'""" self._load_functions(create_fn_query, vector, unique_database, get_fs_path('/test-warehouse/libTestUdfs.so'))
def test_hash_join_timer(self, vector): # This test runs serially because it requires the query to come back within # some amount of time. Running this with other tests makes it hard to bound # that time. It also assumes that it will be able to get a thread token to # execute the join build in parallel. test_case = vector.get_value('test cases') query = test_case[0] join_type = test_case[1] # Ensure that the cluster is idle before starting the test query. for impalad in ImpalaCluster().impalads: verifier = MetricVerifier(impalad.service) verifier.wait_for_metric("impala-server.num-fragments-in-flight", 0) # Execute async to get a handle. Wait until the query has completed. handle = self.execute_query_async(query, vector.get_value('exec_option')) self.impalad_test_service.wait_for_query_state( self.client, handle, self.client.QUERY_STATES['FINISHED'], timeout=40) self.close_query(handle) # Parse the query profile # The hash join node is "id=3". # In the ExecSummary, search for "03:HASH JOIN" line, column 3 (avg) and 4 (max). # In the fragment (including average), search for "HASH_JOIN_NODE (id=2)" and the # non-child time. # Also verify that the build side is in a different thread by searching for: # "Join Build-Side Prepared Asynchronously" profile = self.client.get_runtime_profile(handle) check_execsummary_count = 0 check_fragment_count = 0 asyn_build = False for line in profile.split("\n"): # Matching for ExecSummary if ("03:%s " % (join_type) in line): # Sample line: # 03:HASH JOIN 3 11.89ms 12.543ms 6.57K ... # Split using "JOIN +", then split the right side with space. This becomes: # "3","11.89ms","12.543ms",... # The second column is the average, and the 3rd column is the max rhs = re.split("JOIN +", line)[1] columns = re.split(" +", rhs) self.__verify_join_time(columns[1], "ExecSummary Avg") self.__verify_join_time(columns[2], "ExecSummary Max") check_execsummary_count = 1 # Matching for Fragment (including Average if ("(id=3)" in line): # Sample line: # HASH_JOIN_NODE (id=3):(Total: 3s580ms, non-child: 11.89ms, % non-child: 0.31%) strip1 = re.split("non-child: ", line)[1] non_child_time = re.split(", ", strip1)[0] self.__verify_join_time(non_child_time, "Fragment non-child") check_fragment_count = check_fragment_count + 1 # Search for "Join Build-Side Prepared Asynchronously" if ("Join Build-Side Prepared Asynchronously" in line): asyn_build = True assert (asyn_build ), "Join is not prepared asynchronously: {0}".format(profile) assert (check_fragment_count > 1), \ "Unable to verify Fragment or Average Fragment: {0}".format(profile) assert (check_execsummary_count == 1), \ "Unable to verify ExecSummary: {0}".format(profile)