def test_native_functions_race(self, vector, unique_database): """ IMPALA-6488: stress concurrent adds, uses, and deletes of native functions. Exposes a crash caused by use-after-free in lib-cache.""" # Native function used by a query. Stresses lib-cache during analysis and # backend expressions. create_fn_to_use = """create function {0}.use_it(string) returns string LOCATION '{1}' SYMBOL='_Z8IdentityPN10impala_udf15FunctionContextERKNS_9StringValE'""" use_fn = """select * from (select max(int_col) from functional.alltypesagg where {0}.use_it(string_col) = 'blah' union all (select max(int_col) from functional.alltypesagg where {0}.use_it(String_col) > '1' union all (select max(int_col) from functional.alltypesagg where {0}.use_it(string_col) > '1'))) v""" # Reference to another native function from the same 'so' file. Creating/dropping # stresses lib-cache lookup, add, and refresh. create_another_fn = """create function if not exists {0}.other(float) returns float location '{1}' symbol='Identity'""" drop_another_fn = """drop function if exists {0}.other(float)""" udf_path = get_fs_path('/test-warehouse/libTestUdfs.so') # Tracks number of impalads prior to tests to check that none have crashed. cluster = ImpalaCluster() exp_num_impalads = len(cluster.impalads) setup_client = self.create_impala_client() setup_query = create_fn_to_use.format(unique_database, udf_path) try: setup_client.execute(setup_query) except Exception as e: print "Unable to create initial function: {0}".format(setup_query) raise errors = [] def use_fn_method(): time.sleep(1 + random.random()) client = self.create_impala_client() query = use_fn.format(unique_database) try: client.execute(query) except Exception as e: errors.append(e) def load_fn_method(): time.sleep(1 + random.random()) client = self.create_impala_client() drop = drop_another_fn.format(unique_database) create = create_another_fn.format(unique_database, udf_path) try: client.execute(drop) client.execute(create) except Exception as e: errors.append(e) # number of uses/loads needed to reliably reproduce the bug. num_uses = 200 num_loads = 200 # create threads to use native function. runner_threads = [] for i in xrange(num_uses): runner_threads.append(threading.Thread(target=use_fn_method)) # create threads to drop/create native functions. for i in xrange(num_loads): runner_threads.append(threading.Thread(target=load_fn_method)) # launch all runner threads. for t in runner_threads: t.start() # join all threads. for t in runner_threads: t.join() for e in errors: print e # Checks that no impalad has crashed. cluster.refresh() assert len(cluster.impalads) == exp_num_impalads
sys.exit(1) try: import json wait_for_cluster = wait_for_cluster_web except ImportError: print "json module not found, checking for cluster startup through the command-line" wait_for_cluster = wait_for_cluster_cmdline # Kill existing processes. kill_all(force=options.force_kill) # Make sure the processes have been killed. We loop till we can't detect a single # impald or a statestore process. impala_cluster = ImpalaCluster() while len(impala_cluster.impalads) != 0 or impala_cluster.statestored: impala_cluster.refresh() if options.inprocess: # The statestore and the impalads start in the same process. Additionally, # the statestore does not have a debug webpage. start_mini_impala_cluster(options.cluster_size) wait_for_cluster_cmdline() else: try: start_statestore() start_impalad_instances(options.cluster_size) wait_for_cluster() except Exception, e: print 'Error starting cluster: %s' % e sys.exit(1) print 'ImpalaD Cluster Running with %d nodes.' % options.cluster_size
import json wait_for_cluster = wait_for_cluster_web except ImportError: print "json module not found, checking for cluster startup through the command-line" wait_for_cluster = wait_for_cluster_cmdline # If ImpalaCluster cannot be imported, fall back to the command-line to check # whether impalads/statestore are up. try: from tests.common.impala_cluster import ImpalaCluster # Make sure the processes have been killed. We loop till we can't detect a single # impalad or a statestore process. impala_cluster = ImpalaCluster() while len(impala_cluster.impalads) != 0 or impala_cluster.statestored or\ impala_cluster.catalogd: impala_cluster.refresh() except ImportError: print 'ImpalaCluster module not found.' wait_for_cluster = wait_for_cluster_cmdline if options.inprocess: # The statestore and the impalads start in the same process. Additionally, # the statestore does not have a debug webpage. start_mini_impala_cluster(options.cluster_size) wait_for_cluster_cmdline() else: try: start_statestore() start_catalogd() start_impalad_instances(options.cluster_size) wait_for_cluster()