def setup_class(cls): cls.finish = [] if not is_live_cluster(): raise SkipTest cls.client = make_logged_in_client() cls.user = User.objects.get(username='******') add_to_group('test') cls.db = dbms.get(cls.user, get_query_server_config(name='impala')) cls.DATABASE = get_db_prefix(name='impala') queries = [ 'DROP TABLE IF EXISTS %(db)s.tweets;' % { 'db': cls.DATABASE }, 'DROP DATABASE IF EXISTS %(db)s CASCADE;' % { 'db': cls.DATABASE }, 'CREATE DATABASE %(db)s;' % { 'db': cls.DATABASE } ] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content) queries = [ """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; """, """ INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); """, """ INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); """, """ INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); """, """ INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); """, """ INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """ ] for query in queries: resp = _make_query(cls.client, query, database=cls.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content)
def setUp(self): self.finish = [] # We need a real Impala cluster currently if not 'impala' in sys.argv and not os.environ.get( 'TEST_IMPALAD_HOST'): raise SkipTest if os.environ.get('TEST_IMPALAD_HOST'): self.finish.append( SERVER_HOST.set_for_testing( os.environ.get('TEST_IMPALAD_HOST'))) self.client = make_logged_in_client() self.user = User.objects.get(username='******') add_to_group('test') self.db = dbms.get(self.user, get_query_server_config(name='impala')) hql = """ USE default; DROP TABLE IF EXISTS %(db)s.tweets; DROP DATABASE IF EXISTS %(db)s; CREATE DATABASE %(db)s; USE %(db)s; """ % { 'db': self.DATABASE } resp = _make_query(self.client, hql, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) hql = """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """ resp = _make_query(self.client, hql, database=self.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) def tearDown(self): for f in self.finish: f()
def test_basic_flow(self): dbs = self.db.get_databases() assert_true('_impala_builtins' in dbs, dbs) assert_true(self.DATABASE in dbs, dbs) tables = self.db.get_tables(database=self.DATABASE) assert_true('tweets' in tables, tables) QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name='impala') response = wait_for_query_to_finish(self.client, response, max=180.0) results = [] # Check that we multiple fetches get all the result set while len(results) < 5: content = fetch_query_result_data(self.client, response, n=len(results), server_name='impala') # We get less than 5 results most of the time, so increase offset results += content['results'] assert_equal([1, 2, 3, 4, 5], [col[0] for col in results]) # Check start over results_start_over = [] while len(results_start_over) < 5: content = fetch_query_result_data(self.client, response, n=len(results_start_over), server_name='impala') results_start_over += content['results'] assert_equal(results_start_over, results)
def test_refresh_table(self): # Helper function to get Impala and Beeswax (HMS) columns def get_impala_beeswax_columns(): impala_resp = self.client.get(reverse('impala:api_autocomplete_columns', kwargs={'database': self.DATABASE, 'table': 'tweets'})) impala_columns = json.loads(impala_resp.content)['columns'] beeswax_resp = self.client.get(reverse('beeswax:api_autocomplete_columns', kwargs={'database': self.DATABASE, 'table': 'tweets'})) beeswax_columns = json.loads(beeswax_resp.content)['columns'] return impala_columns, beeswax_columns impala_columns, beeswax_columns = get_impala_beeswax_columns() assert_equal(impala_columns, beeswax_columns, "\ntest_refresh_table: `%s`.`%s`\nImpala Columns: %s\nBeeswax Columns: %s" % (self.DATABASE, 'tweets', ','.join(impala_columns), ','.join(beeswax_columns))) hql = """ ALTER TABLE tweets ADD COLUMNS (new_column INT); """ resp = _make_query(self.client, hql, wait=True, local=False, max=180.0, database=self.DATABASE) impala_columns, beeswax_columns = get_impala_beeswax_columns() # New column is not found by Impala assert_true('new_column' in beeswax_columns, beeswax_columns) assert_false('new_column' in impala_columns, impala_columns) resp = self.client.post(reverse('impala:refresh_table', kwargs={'database': self.DATABASE, 'table': 'tweets'})) impala_columns, beeswax_columns = get_impala_beeswax_columns() # Invalidate picks up new column assert_equal(impala_columns, beeswax_columns, "\ntest_refresh_table: `%s`.`%s`\nImpala Columns: %s\nBeeswax Columns: %s" % (self.DATABASE, 'tweets', ','.join(impala_columns), ','.join(beeswax_columns)))
def test_invalidate_tables(self): # Helper function to get Impala and Beeswax (HMS) tables def get_impala_beeswax_tables(): impala_resp = self.client.get(reverse('impala:api_autocomplete_tables', kwargs={'database': self.DATABASE})) impala_tables_meta = json.loads(impala_resp.content)['tables_meta'] impala_tables = [table['name'] for table in impala_tables_meta] beeswax_resp = self.client.get(reverse('beeswax:api_autocomplete_tables', kwargs={'database': self.DATABASE})) beeswax_tables_meta = json.loads(beeswax_resp.content)['tables_meta'] beeswax_tables = [table['name'] for table in beeswax_tables_meta] return impala_tables, beeswax_tables impala_tables, beeswax_tables = get_impala_beeswax_tables() assert_equal(impala_tables, beeswax_tables, "\ntest_invalidate_tables: `%s`\nImpala Tables: %s\nBeeswax Tables: %s" % (self.DATABASE, ','.join(impala_tables), ','.join(beeswax_tables))) hql = """ CREATE TABLE new_table (a INT); """ resp = _make_query(self.client, hql, wait=True, local=False, max=180.0, database=self.DATABASE) impala_tables, beeswax_tables = get_impala_beeswax_tables() # New table is not found by Impala assert_true('new_table' in beeswax_tables, beeswax_tables) assert_false('new_table' in impala_tables, impala_tables) resp = self.client.post(reverse('impala:invalidate'), {'database': self.DATABASE}) impala_tables, beeswax_tables = get_impala_beeswax_tables() # Invalidate picks up new table assert_equal(impala_tables, beeswax_tables, "\ntest_invalidate_tables: `%s`\nImpala Tables: %s\nBeeswax Tables: %s" % (self.DATABASE, ','.join(impala_tables), ','.join(beeswax_tables)))
def teardown_class(cls): # We need to drop tables before dropping the database queries = [ """ DROP TABLE IF EXISTS %(db)s.tweets; """ % { 'db': cls.DATABASE }, """ DROP DATABASE %(db)s CASCADE; """ % { 'db': cls.DATABASE } ] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) # Check the cleanup databases = cls.db.get_databases() assert_false(cls.DATABASE in databases) assert_false('%(db)s_other' % {'db': cls.DATABASE} in databases) for f in cls.finish: f()
def test_get_exec_summary(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post( reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true( len(data['summary']['nodes']) > 0, data['summary']['nodes']) # Attempt to call get_exec_summary on a closed query resp = self.client.post( reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true( len(data['summary']['nodes']) > 0, data['summary']['nodes'])
def test_explain(self): QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name='impala', submission_type='Explain') json_response = json.loads(response.content) assert_true('MERGING-EXCHANGE' in json_response['explanation'], json_response) assert_true('SCAN HDFS' in json_response['explanation'], json_response)
def setup_class(cls): cls.finish = [] if not is_live_cluster(): raise SkipTest cls.client = make_logged_in_client() cls.user = User.objects.get(username='******') add_to_group('test') cls.db = dbms.get(cls.user, get_query_server_config(name='impala')) cls.DATABASE = get_db_prefix(name='impala') queries = [""" DROP TABLE IF EXISTS %(db)s.tweets; """ % {'db': cls.DATABASE}, """ DROP DATABASE IF EXISTS %(db)s CASCADE; """ % {'db': cls.DATABASE}, """ CREATE DATABASE %(db)s; """ % {'db': cls.DATABASE}] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content) queries = [""" CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; """, """ INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); """, """ INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); """, """ INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); """, """ INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); """, """ INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """] for query in queries: resp = _make_query(cls.client, query, database=cls.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content)
def setUp(self): self.finish = [] # We need a real Impala cluster currently if not 'impala' in sys.argv and not os.environ.get('TEST_IMPALAD_HOST'): raise SkipTest if os.environ.get('TEST_IMPALAD_HOST'): self.finish.append(SERVER_HOST.set_for_testing(os.environ.get('TEST_IMPALAD_HOST'))) self.client = make_logged_in_client() self.user = User.objects.get(username='******') add_to_group('test') self.db = dbms.get(self.user, get_query_server_config(name='impala')) hql = """ USE default; DROP TABLE IF EXISTS %(db)s.tweets; DROP DATABASE IF EXISTS %(db)s; CREATE DATABASE %(db)s; USE %(db)s; """ % {'db': self.DATABASE} resp = _make_query(self.client, hql, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) hql = """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """ resp = _make_query(self.client, hql, database=self.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) def tearDown(self): for f in self.finish: f()
def test_basic_flow(self): dbs = self.db.get_databases() assert_true('_impala_builtins' in dbs, dbs) assert_true(self.DATABASE in dbs, dbs) tables = self.db.get_tables(database=self.DATABASE) assert_true('tweets' in tables, tables) QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) response = wait_for_query_to_finish(self.client, response, max=180.0) results = [] # Check that we multiple fetches get all the result set while len(results) < 5: # We get less than 5 results most of the time, so increase offset content = fetch_query_result_data(self.client, response, n=len(results), server_name='impala') results += content['results'] assert_equal([1, 2, 3, 4, 5], [col[0] for col in results]) # Check start over results_start_over = [] while len(results_start_over) < 5: content = fetch_query_result_data(self.client, response, n=len(results_start_over), server_name='impala') results_start_over += content['results'] assert_equal(results_start_over, results) # Check cancel query resp = self.client.post( reverse('impala:api_cancel_query', kwargs={'query_history_id': query_history.id})) content = json.loads(resp.content) assert_equal(0, content['status'])
def test_get_runtime_profile(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post(reverse('impala:get_runtime_profile', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('Execution Profile' in data['profile'], data)
def teardown_class(cls): # We need to drop tables before dropping the database hql = """ USE default; DROP TABLE IF EXISTS %(db)s.tweets; DROP DATABASE %(db)s; """ % {'db': cls.DATABASE} resp = _make_query(cls.client, hql, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=30.0) # Check the cleanup databases = db.get_databases() assert_false(cls.db_name in databases) assert_false('%(db)s_other' % {'db': cls.db_name} in databases) for f in cls.finish: f()
def teardown_class(cls): # We need to drop tables before dropping the database queries = [""" DROP TABLE IF EXISTS %(db)s.tweets; """ % {'db': cls.DATABASE}, """ DROP DATABASE %(db)s CASCADE; """ % {'db': cls.DATABASE}] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) # Check the cleanup databases = cls.db.get_databases() assert_false(cls.DATABASE in databases) assert_false('%(db)s_other' % {'db': cls.DATABASE} in databases) for f in cls.finish: f()
def test_basic_flow(self): dbs = self.db.get_databases() assert_true("_impala_builtins" in dbs, dbs) assert_true(self.DATABASE in dbs, dbs) tables = self.db.get_tables(database=self.DATABASE) assert_true("tweets" in tables, tables) QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name="impala") content = json.loads(response.content) query_history = QueryHistory.get(content["id"]) response = wait_for_query_to_finish(self.client, response, max=180.0) results = [] # Check that we multiple fetches get all the result set while len(results) < 5: content = fetch_query_result_data( self.client, response, n=len(results), server_name="impala" ) # We get less than 5 results most of the time, so increase offset results += content["results"] assert_equal([1, 2, 3, 4, 5], [col[0] for col in results]) # Check start over results_start_over = [] while len(results_start_over) < 5: content = fetch_query_result_data(self.client, response, n=len(results_start_over), server_name="impala") results_start_over += content["results"] assert_equal(results_start_over, results) # Check cancel query resp = self.client.post(reverse("impala:api_cancel_query", kwargs={"query_history_id": query_history.id})) content = json.loads(resp.content) assert_equal(0, content["status"])
def teardown_class(cls): # We need to drop tables before dropping the database hql = """ USE default; DROP TABLE IF EXISTS %(db)s.tweets; DROP DATABASE %(db)s; """ % { 'db': cls.DATABASE } resp = _make_query(cls.client, hql, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=30.0) # Check the cleanup databases = db.get_databases() assert_false(cls.db_name in databases) assert_false('%(db)s_other' % {'db': cls.db_name} in databases) for f in cls.finish: f()
def test_get_exec_summary(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post(reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true(len(data['summary']['nodes']) > 0, data['summary']['nodes']) # Attempt to call get_exec_summary on a closed query resp = self.client.post(reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true(len(data['summary']['nodes']) > 0, data['summary']['nodes'])