def save_and_verify(select_resp, target_dir, verify=True): qid = select_resp.context['query'].id save_data = { 'save_target': beeswax.forms.SaveResultsForm.SAVE_TYPE_DIR, 'target_dir': target_dir, 'save': True } resp = self.client.post('/beeswax/save_results/%s' % (qid, ), save_data, follow=True) wait_for_query_to_finish(self.client, resp, max=60) # Check that data is right if verify: target_ls = self.cluster.fs.listdir(target_dir) assert_true(len(target_ls) >= 1) data_buf = "" for target in target_ls: target_file = self.cluster.fs.open(target_dir + '/' + target) data_buf += target_file.read() target_file.close() assert_equal(256, len(data_buf.strip().split('\n'))) assert_true('255' in data_buf) return resp
def test_drop_partition(self): # Create partition first partition_spec = "baz='baz_drop',boom='boom_drop'" hql = 'ALTER TABLE `%s`.`test_partitions` ADD IF NOT EXISTS PARTITION (%s);' % ( self.db_name, partition_spec) resp = _make_query(self.client, hql, database=self.db_name) wait_for_query_to_finish(self.client, resp, max=30.0) # Assert partition exists response = self.client.get( "/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_true("baz_drop" in response.content) # Drop partition self.client.post( "/metastore/table/%s/test_partitions/partitions/drop" % self.db_name, {'partition_selection': [partition_spec]}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace( "ALTER TABLE `%s`.`test_partitions` DROP IF EXISTS PARTITION (%s) PURGE" % (self.db_name, partition_spec), query.query) response = self.client.get( "/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_false("baz_drop" in response.content)
def test_drop_multi_databases(self): db1 = '%s_test_drop_1' % self.db_name db2 = '%s_test_drop_2' % self.db_name db3 = '%s_test_drop_3' % self.db_name try: hql = """ CREATE DATABASE %(db1)s; CREATE DATABASE %(db2)s; CREATE DATABASE %(db3)s; """ % {'db1': db1, 'db2': db2, 'db3': db3} resp = _make_query(self.client, hql) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Add a table to db1 hql = "CREATE TABLE " + "`" + db1 + "`." + "`test_drop_1` (a int);" resp = _make_query(self.client, hql, database=db1) resp = wait_for_query_to_finish(self.client, resp, max=30.0) assert_equal(resp.status_code, 200) # Drop them resp = self.client.get('/metastore/databases/drop', follow=True) assert_true('want to delete' in resp.content, resp.content) resp = self.client.post('/metastore/databases/drop', {u'database_selection': [db1, db2, db3], 'is_embeddable': True}) assert_equal(resp.status_code, 302) finally: make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db1}, wait=True) make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db2}, wait=True) make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db3}, wait=True)
def test_drop_multi_databases(self): db1 = '%s_test_drop_1' % self.db_name db2 = '%s_test_drop_2' % self.db_name db3 = '%s_test_drop_3' % self.db_name try: hql = """ CREATE DATABASE %(db1)s; CREATE DATABASE %(db2)s; CREATE DATABASE %(db3)s; """ % {'db1': db1, 'db2': db2, 'db3': db3} resp = _make_query(self.client, hql) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Add a table to db1 hql = "CREATE TABLE " + "`" + db1 + "`." + "`test_drop_1` (a int);" resp = _make_query(self.client, hql, database=db1) resp = wait_for_query_to_finish(self.client, resp, max=30.0) assert_equal(resp.status_code, 200) # Drop them resp = self.client.get('/metastore/databases/drop', follow=True) assert_true('want to delete' in resp.content, resp.content) resp = self.client.post('/metastore/databases/drop', {u'database_selection': [db1, db2, db3]}) assert_equal(resp.status_code, 302) finally: make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db1}, wait=True) make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db2}, wait=True) make_query(self.client, 'DROP DATABASE IF EXISTS %(db)s' % {'db': db3}, wait=True)
def test_get_exec_summary(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post( reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true( len(data['summary']['nodes']) > 0, data['summary']['nodes']) # Attempt to call get_exec_summary on a closed query resp = self.client.post( reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true( len(data['summary']['nodes']) > 0, data['summary']['nodes'])
def setup_class(cls): cls.finish = [] if not is_live_cluster(): raise SkipTest cls.client = make_logged_in_client() cls.user = User.objects.get(username='******') add_to_group('test') cls.db = dbms.get(cls.user, get_query_server_config(name='impala')) cls.DATABASE = get_db_prefix(name='impala') queries = [ 'DROP TABLE IF EXISTS %(db)s.tweets;' % { 'db': cls.DATABASE }, 'DROP DATABASE IF EXISTS %(db)s CASCADE;' % { 'db': cls.DATABASE }, 'CREATE DATABASE %(db)s;' % { 'db': cls.DATABASE } ] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content) queries = [ """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; """, """ INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); """, """ INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); """, """ INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); """, """ INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); """, """ INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """ ] for query in queries: resp = _make_query(cls.client, query, database=cls.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content)
def test_show_tables(self): # Set max limit to 3 resets = [HS2_GET_TABLES_MAX.set_for_testing(3)] try: hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 3) assert_equal(response.context['has_metadata'], True) assert_true('name' in response.context["tables"][0]) assert_true('comment' in response.context["tables"][0]) assert_true('type' in response.context["tables"][0]) hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 5) assert_equal(response.context['has_metadata'], False) assert_true('name' in response.context["tables"][0]) assert_false('comment' in response.context["tables"][0], response.context["tables"]) assert_false('type' in response.context["tables"][0]) hql = """ CREATE INDEX test_index ON TABLE test_show_tables_1 (a) AS 'COMPACT' WITH DEFERRED REBUILD; """ resp = _make_query(self.client, hql, wait=True, local=False, max=30.0, database=self.db_name) # By default, index table should not appear in show tables view response = self.client.get("/metastore/tables/%s" % self.db_name) assert_equal(200, response.status_code) assert_false('test_index' in response.context['tables']) finally: for reset in resets: reset()
def setUp(self): self.finish = [] # We need a real Impala cluster currently if not 'impala' in sys.argv and not os.environ.get( 'TEST_IMPALAD_HOST'): raise SkipTest if os.environ.get('TEST_IMPALAD_HOST'): self.finish.append( SERVER_HOST.set_for_testing( os.environ.get('TEST_IMPALAD_HOST'))) self.client = make_logged_in_client() self.user = User.objects.get(username='******') add_to_group('test') self.db = dbms.get(self.user, get_query_server_config(name='impala')) hql = """ USE default; DROP TABLE IF EXISTS %(db)s.tweets; DROP DATABASE IF EXISTS %(db)s; CREATE DATABASE %(db)s; USE %(db)s; """ % { 'db': self.DATABASE } resp = _make_query(self.client, hql, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) hql = """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """ resp = _make_query(self.client, hql, database=self.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) def tearDown(self): for f in self.finish: f()
def test_explain_query(self): c = self.client response = _make_query(c, "SELECT KITTENS ARE TASTY", submission_type="Explain") assert_true("ParseException" in response.context["error_message"]) CREATE_TABLE = "CREATE TABLE test_explain (foo INT, bar STRING);" response = _make_query(c, CREATE_TABLE) wait_for_query_to_finish(c, response) response = _make_query(c, "SELECT SUM(foo) FROM test_explain", submission_type="Explain") assert_true(response.context["explanation"])
def test_explain_query(self): c = self.client response = _make_query(c, "SELECT KITTENS ARE TASTY", submission_type="Explain") assert_true("Parse Error" in response.context["error_message"]) CREATE_TABLE = "CREATE TABLE test_explain (foo INT, bar STRING);" response = _make_query(c, CREATE_TABLE) wait_for_query_to_finish(c, response) response = _make_query(c, "SELECT SUM(foo) FROM test_explain", submission_type="Explain") assert_true(response.context["explanation"])
def test_show_tables(self): hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables&format=json" % self.db_name) assert_equal(200, response.status_code) data = json.loads(response.content) assert_equal(len(data['tables']), 3) assert_true('name' in data["tables"][0]) assert_true('comment' in data["tables"][0]) assert_true('type' in data["tables"][0]) hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables&format=json" % self.db_name) assert_equal(200, response.status_code) data = json.loads(response.content) assert_equal(len(data['tables']), 5) assert_true('name' in data["tables"][0]) assert_true('comment' in data["tables"][0]) assert_true('type' in data["tables"][0]) hql = """ CREATE INDEX test_index ON TABLE test_show_tables_1 (a) AS 'COMPACT' WITH DEFERRED REBUILD; """ resp = _make_query(self.client, hql, wait=True, local=False, max=30.0, database=self.db_name) # By default, index table should not appear in show tables view response = self.client.get("/metastore/tables/%s?format=json" % self.db_name) assert_equal(200, response.status_code) data = json.loads(response.content) assert_false('test_index' in data['tables'])
def test_select_multi_db(self): response = _make_query(self.client, 'SELECT * FROM test LIMIT 5', local=False, database='default') response = wait_for_query_to_finish(self.client, response) assert_true('Query Results' in response.content, response.content) response = _make_query(self.client, 'SHOW TABLES', local=False, database='other_db') response = wait_for_query_to_finish(self.client, response) assert_true('Query Results' in response.content, response.content) response = _make_query(self.client, 'SELECT * FROM test LIMIT 5', local=False, database='not_there') response = wait_for_query_to_finish(self.client, response) assert_true('Error' in response.content, response.content)
def test_show_tables(self): # Set max limit to 3 resets = [ HS2_GET_TABLES_MAX.set_for_testing(3) ] try: hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 3) assert_equal(response.context['has_metadata'], True) assert_true('name' in response.context["tables"][0]) assert_true('comment' in response.context["tables"][0]) assert_true('type' in response.context["tables"][0]) hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 5) assert_equal(response.context['has_metadata'], False) assert_true('name' in response.context["tables"][0]) assert_false('comment' in response.context["tables"][0], response.context["tables"]) assert_false('type' in response.context["tables"][0]) hql = """ CREATE INDEX test_index ON TABLE test_show_tables_1 (a) AS 'COMPACT' WITH DEFERRED REBUILD; """ resp = _make_query(self.client, hql, wait=True, local=False, max=30.0, database=self.db_name) # By default, index table should not appear in show tables view response = self.client.get("/metastore/tables/%s" % self.db_name) assert_equal(200, response.status_code) assert_false('test_index' in response.context['tables']) finally: for reset in resets: reset()
def test_get_runtime_profile(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post(reverse('impala:get_runtime_profile', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('Execution Profile' in data['profile'], data)
def save_and_verify(select_resp, target_tbl): """Check that saving to table works""" qid = select_resp.context['query'].id save_data = { 'save_target': beeswax.forms.SaveResultsForm.SAVE_TYPE_TBL, 'target_table': target_tbl, 'save': True } resp = self.client.post('/beeswax/save_results/%s' % (qid,), save_data, follow=True) wait_for_query_to_finish(self.client, resp, max=60) # Check that data is right. The SELECT may not give us the whole table. resp = _make_query(self.client, 'SELECT * FROM %s' % (target_tbl,), wait=True) for i in xrange(90): assert_equal([str(i), '0x%x' % (i,)], resp.context['results'][i])
def test_has_write_access_backend(self): if is_live_cluster(): raise SkipTest('HUE-2900: Needs debugging on live cluster') client = make_logged_in_client(username='******', groupname='write_access_backend', is_superuser=False) grant_access("write_access_backend", "write_access_backend", "metastore") grant_access("write_access_backend", "write_access_backend", "beeswax") user = User.objects.get(username='******') resp = _make_query(client, 'CREATE TABLE test_perm_1 (a int);', database=self.db_name) # Only fails if we were using Sentry and won't allow SELECT to user resp = wait_for_query_to_finish(client, resp, max=30.0) def check(client, http_codes): resp = client.get('/metastore/tables/drop/%s' % self.db_name) assert_true(resp.status_code in http_codes, resp.content) resp = client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_perm_1']}) assert_true(resp.status_code in http_codes, resp.content) check(client, [301]) # Denied # Add access group, created = Group.objects.get_or_create(name='write_access_backend') perm, created = HuePermission.objects.get_or_create(app='metastore', action='write') GroupPermission.objects.get_or_create(group=group, hue_permission=perm) check(client, [200, 302]) # Ok
def test_alter_table(self): resp = _make_query(self.client, "CREATE TABLE test_alter_table (a int) COMMENT 'Before Alter';", database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) resp = self.client.get('/metastore/table/%s/test_alter_table' % self.db_name) assert_true('test_alter_table', resp.content) assert_true('Before Alter', resp.content) # Alter name resp = self.client.post(reverse("metastore:alter_table", kwargs={'database': self.db_name, 'table': 'test_alter_table'}), {'new_table_name': 'table_altered'}) json_resp = json.loads(resp.content) assert_equal('table_altered', json_resp['data']['name'], json_resp) # Alter comment resp = self.client.post(reverse("metastore:alter_table", kwargs={'database': self.db_name, 'table': 'table_altered'}), {'comment': 'After Alter'}) json_resp = json.loads(resp.content) assert_equal('After Alter', json_resp['data']['comment'], json_resp) # Invalid table name returns error response resp = self.client.post(reverse("metastore:alter_table", kwargs={'database': self.db_name, 'table': 'table_altered'}), {'new_table_name': 'bad name'}) json_resp = json.loads(resp.content) assert_equal(1, json_resp['status'], json_resp) assert_true('Failed to alter table' in json_resp['data'], json_resp)
def test_basic_flow(self): dbs = self.db.get_databases() assert_true('_impala_builtins' in dbs, dbs) assert_true(self.DATABASE in dbs, dbs) tables = self.db.get_tables(database=self.DATABASE) assert_true('tweets' in tables, tables) QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name='impala') response = wait_for_query_to_finish(self.client, response, max=180.0) results = [] # Check that we multiple fetches get all the result set while len(results) < 5: content = fetch_query_result_data(self.client, response, n=len(results), server_name='impala') # We get less than 5 results most of the time, so increase offset results += content['results'] assert_equal([1, 2, 3, 4, 5], [col[0] for col in results]) # Check start over results_start_over = [] while len(results_start_over) < 5: content = fetch_query_result_data(self.client, response, n=len(results_start_over), server_name='impala') results_start_over += content['results'] assert_equal(results_start_over, results)
def save_and_verify(select_resp, target_tbl): """Check that saving to table works""" qid = select_resp.context['query'].id save_data = { 'save_target': beeswax.forms.SaveResultsForm.SAVE_TYPE_TBL, 'target_table': target_tbl, 'save': True } resp = self.client.post('/beeswax/save_results/%s' % (qid,), save_data, follow=True) wait_for_query_to_finish(self.client, resp, max=120) # Check that data is right. The SELECT may not give us the whole table. resp = _make_query(self.client, 'SELECT * FROM %s' % (target_tbl,), wait=True, local=False) for i in xrange(90): assert_equal([str(i), '0x%x' % (i,)], resp.context['results'][i])
def test_alter_column(self): resp = _make_query(self.client, 'CREATE TABLE test_alter_column (before_alter int);', database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) resp = self.client.get('/metastore/table/%s/test_alter_column' % self.db_name) assert_true('before_alter', resp.content) assert_true('int', resp.content) # Alter name, type and comment resp = self.client.post( reverse("metastore:alter_column", kwargs={'database': self.db_name, 'table': 'test_alter_column'}), {'column': 'before_alter', 'new_column_name': 'after_alter', 'new_column_type': 'string', 'comment': 'alter comment'} ) json_resp = json.loads(resp.content) assert_equal('after_alter', json_resp['data']['name'], json_resp) assert_equal('string', json_resp['data']['type'], json_resp) assert_equal('alter comment', json_resp['data']['comment'], json_resp) # Invalid column type returns error response resp = self.client.post(reverse("metastore:alter_column", kwargs={'database': self.db_name, 'table': 'test_alter_column'}), {'column': 'before_alter', 'new_column_name': 'foo'}) json_resp = json.loads(resp.content) assert_equal(1, json_resp['status'], json_resp) assert_true('Failed to alter column' in json_resp['message'], json_resp)
def test_alter_column(self): resp = _make_query(self.client, "CREATE TABLE test_alter_column (before_alter int);", database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) resp = self.client.get("/metastore/table/%s/test_alter_column" % self.db_name) assert_true("before_alter", resp.content) assert_true("int", resp.content) # Alter name, type and comment resp = self.client.post( reverse("metastore:alter_column", kwargs={"database": self.db_name, "table": "test_alter_column"}), { "column": "before_alter", "new_column_name": "after_alter", "new_column_type": "string", "comment": "alter comment", }, ) json_resp = json.loads(resp.content) assert_equal("after_alter", json_resp["data"]["name"], json_resp) assert_equal("string", json_resp["data"]["type"], json_resp) assert_equal("alter comment", json_resp["data"]["comment"], json_resp) # Invalid column type returns error response resp = self.client.post( reverse("metastore:alter_column", kwargs={"database": self.db_name, "table": "test_alter_column"}), {"column": "before_alter", "new_column_name": "foo"}, ) json_resp = json.loads(resp.content) assert_equal(1, json_resp["status"], json_resp) assert_true("Failed to alter column" in json_resp["data"], json_resp)
def test_alter_table(self): resp = _make_query( self.client, "CREATE TABLE test_alter_table (a int) COMMENT 'Before Alter';", database=self.db_name ) resp = wait_for_query_to_finish(self.client, resp, max=30.0) resp = self.client.get("/metastore/table/%s/test_alter_table" % self.db_name) assert_true("test_alter_table", resp.content) assert_true("Before Alter", resp.content) # Alter name resp = self.client.post( reverse("metastore:alter_table", kwargs={"database": self.db_name, "table": "test_alter_table"}), {"new_table_name": "table_altered"}, ) json_resp = json.loads(resp.content) assert_equal("table_altered", json_resp["data"]["name"], json_resp) # Alter comment resp = self.client.post( reverse("metastore:alter_table", kwargs={"database": self.db_name, "table": "table_altered"}), {"comment": "After Alter"}, ) json_resp = json.loads(resp.content) assert_equal("After Alter", json_resp["data"]["comment"], json_resp) # Invalid table name returns error response resp = self.client.post( reverse("metastore:alter_table", kwargs={"database": self.db_name, "table": "table_altered"}), {"new_table_name": "bad name"}, ) json_resp = json.loads(resp.content) assert_equal(1, json_resp["status"], json_resp) assert_true("Failed to alter table" in json_resp["data"], json_resp)
def test_has_write_access_backend(self): client = make_logged_in_client( username="******", groupname="write_access_backend", is_superuser=False ) grant_access("write_access_backend", "write_access_backend", "metastore") grant_access("write_access_backend", "write_access_backend", "beeswax") user = User.objects.get(username="******") resp = _make_query( client, "CREATE TABLE test_perm_1 (a int);", database=self.db_name ) # Only fails if we were using Sentry and won't allow SELECT to user resp = wait_for_query_to_finish(client, resp, max=30.0) def check(client, http_codes): resp = client.get("/metastore/tables/drop/%s" % self.db_name) assert_true(resp.status_code in http_codes, resp.content) resp = client.post("/metastore/tables/drop/%s" % self.db_name, {u"table_selection": [u"test_perm_1"]}) assert_true(resp.status_code in http_codes, resp.content) check(client, [301]) # Denied # Add access group, created = Group.objects.get_or_create(name="write_access_backend") perm, created = HuePermission.objects.get_or_create(app="metastore", action="write") GroupPermission.objects.get_or_create(group=group, hue_permission=perm) check(client, [200, 302]) # Ok
def test_drop_multi_databases(self): db1 = "%s_test_drop_1" % self.db_name db2 = "%s_test_drop_2" % self.db_name db3 = "%s_test_drop_3" % self.db_name try: hql = """ CREATE DATABASE %(db1)s; CREATE DATABASE %(db2)s; CREATE DATABASE %(db3)s; """ % { "db1": db1, "db2": db2, "db3": db3, } resp = _make_query(self.client, hql) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Drop them resp = self.client.get("/metastore/databases/drop", follow=True) assert_true("want to delete" in resp.content, resp.content) resp = self.client.post("/metastore/databases/drop", {u"database_selection": [db1, db2, db3]}) assert_equal(resp.status_code, 302) finally: make_query(self.client, "DROP DATABASE IF EXISTS %(db)s" % {"db": db1}, wait=True) make_query(self.client, "DROP DATABASE IF EXISTS %(db)s" % {"db": db2}, wait=True) make_query(self.client, "DROP DATABASE IF EXISTS %(db)s" % {"db": db3}, wait=True)
def test_basic_flow(self): # Default database should exist response = self.client.get("/metastore/databases") assert_true(self.db_name in response.context["databases"]) # Table should have been created response = self.client.get("/metastore/tables/") assert_equal(200, response.status_code) # Switch databases response = self.client.get("/metastore/tables/%s" % self.db_name) assert_true('name' in response.context["tables"][0]) assert_true("test" in response.context["table_names"]) # Should default to "default" database response = self.client.get("/metastore/tables/not_there") assert_equal(200, response.status_code) # And have detail response = self.client.get("/metastore/table/%s/test" % self.db_name) assert_true("foo" in response.content) assert_true("SerDe Library" in response.content, response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/metastore/table/%s/test/read" % self.db_name, follow=True) response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history')
def test_basic_flow(self): # Default database should exist response = self.client.get("/metastore/databases") assert_true("default" in response.context["databases"]) # Table should have been created response = self.client.get("/metastore/tables/") assert_true("test" in response.context["tables"]) # Switch databases response = self.client.get("/metastore/tables/default") assert_true("test" in response.context["tables"]) # Should default to "default" database response = self.client.get("/metastore/tables/not_there") assert_true("test" in response.context["tables"]) # And have detail response = self.client.get("/metastore/table/default/test") assert_true("foo" in response.content) assert_true("SerDe Library" in response.content, response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/metastore/table/default/test/read", follow=True) response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history')
def test_drop_multi_tables_with_skip_trash(self): hql = """ CREATE TABLE test_drop_multi_tables_with_skip_trash_1 (a int); CREATE TABLE test_drop_multi_tables_with_skip_trash_2 (a int); CREATE TABLE test_drop_multi_tables_with_skip_trash_3 (a int); """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Drop them resp = self.client.get('/metastore/tables/drop/%s' % self.db_name, follow=True) assert_true('want to delete' in resp.content, resp.content) resp = self.client.post( '/metastore/tables/drop/%s' % self.db_name, { u'table_selection': [ u'test_drop_multi_tables_with_skip_trash_1', u'test_drop_multi_tables_with_skip_trash_2', u'test_drop_multi_tables_with_skip_trash_3' ], u'skip_trash': u'on' }) assert_equal(resp.status_code, 302) response = self.client.get("/metastore/tables/%s?format=json" % self.db_name) assert_equal(200, response.status_code) data = json.loads(response.content) assert_false( 'test_drop_multi_tables_with_skip_trash_1' in data['tables']) assert_false( 'test_drop_multi_tables_with_skip_trash_2' in data['tables']) assert_false( 'test_drop_multi_tables_with_skip_trash_3' in data['tables'])
def test_read_partitions(self): partition_spec = "baz='baz_one',boom='boom_two'" response = self.client.get("/metastore/table/%s/test_partitions/partitions/%s/read" % (self.db_name, partition_spec), follow=True) response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0, results)
def test_basic_flow(self): # Table should have been created response = self.client.get("/catalog/tables/") assert_true("test" in response.context["tables"]) # Switch databases response = self.client.get("/catalog/tables/default") assert_true("test" in response.context["tables"]) response = self.client.get("/catalog/tables/not_there") assert_false("test" in response.context["tables"]) # And have detail response = self.client.get("/catalog/table/default/test") assert_true("foo" in response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/catalog/table/default/test/read", follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(response.context['results']) > 10) # Column names assert_true("<td>foo</td>" in response.content) assert_true("<td>bar</td>" in response.content) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history') assert_equal(str(response.context['query_context'][0]), 'table') assert_equal(str(response.context['query_context'][1]), 'test:default')
def test_has_write_access_backend(self): client = make_logged_in_client(username='******', groupname='write_access_backend', is_superuser=False) grant_access("write_access_backend", "write_access_backend", "metastore") grant_access("write_access_backend", "write_access_backend", "beeswax") user = User.objects.get(username='******') resp = _make_query( client, 'CREATE TABLE test_perm_1 (a int);', database=self.db_name ) # Only fails if we were using Sentry and won't allow SELECT to user resp = wait_for_query_to_finish(client, resp, max=30.0) def check(client, http_codes): resp = client.get('/metastore/tables/drop/%s' % self.db_name) assert_true(resp.status_code in http_codes, resp.content) resp = client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_perm_1']}) assert_true(resp.status_code in http_codes, resp.content) check(client, [301]) # Denied # Add access group, created = Group.objects.get_or_create( name='write_access_backend') perm, created = HuePermission.objects.get_or_create(app='metastore', action='write') GroupPermission.objects.get_or_create(group=group, hue_permission=perm) check(client, [200, 302]) # Ok
def teardown_class(cls): # We need to drop tables before dropping the database queries = [ """ DROP TABLE IF EXISTS %(db)s.tweets; """ % { 'db': cls.DATABASE }, """ DROP DATABASE %(db)s CASCADE; """ % { 'db': cls.DATABASE } ] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) # Check the cleanup databases = cls.db.get_databases() assert_false(cls.DATABASE in databases) assert_false('%(db)s_other' % {'db': cls.DATABASE} in databases) for f in cls.finish: f()
def test_drop_partition(self): # Create partition first partition_spec = "baz='baz_drop',boom='boom_drop'" hql = 'ALTER TABLE `%s`.`test_partitions` ADD IF NOT EXISTS PARTITION (%s);' % (self.db_name, partition_spec) resp = _make_query(self.client, hql, database=self.db_name) wait_for_query_to_finish(self.client, resp, max=30.0) # Assert partition exists response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_true("baz_drop" in response.content) # Drop partition self.client.post("/metastore/table/%s/test_partitions/partitions/drop" % self.db_name, {'partition_selection': [partition_spec]}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("ALTER TABLE `%s`.`test_partitions` DROP IF EXISTS PARTITION (%s) PURGE" % (self.db_name, partition_spec), query.query) response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_false("baz_drop" in response.content)
def test_show_tables(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') # Set max limit to 3 resets = [HS2_GET_TABLES_MAX.set_for_testing(3)] try: hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 3) assert_equal(response.context['has_metadata'], True) assert_true('name' in response.context["tables"][0]) assert_true('comment' in response.context["tables"][0]) assert_true('type' in response.context["tables"][0]) hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get( "/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 5) assert_equal(response.context['has_metadata'], False) assert_true('name' in response.context["tables"][0]) assert_false('comment' in response.context["tables"][0], response.context["tables"]) assert_false('type' in response.context["tables"][0]) finally: for reset in resets: reset()
def setup_class(cls): cls.finish = [] if not is_live_cluster(): raise SkipTest cls.client = make_logged_in_client() cls.user = User.objects.get(username='******') add_to_group('test') cls.db = dbms.get(cls.user, get_query_server_config(name='impala')) cls.DATABASE = get_db_prefix(name='impala') queries = [""" DROP TABLE IF EXISTS %(db)s.tweets; """ % {'db': cls.DATABASE}, """ DROP DATABASE IF EXISTS %(db)s CASCADE; """ % {'db': cls.DATABASE}, """ CREATE DATABASE %(db)s; """ % {'db': cls.DATABASE}] for query in queries: resp = _make_query(cls.client, query, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content) queries = [""" CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; """, """ INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); """, """ INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); """, """ INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); """, """ INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); """, """ INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """] for query in queries: resp = _make_query(cls.client, query, database=cls.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(cls.client, resp, max=180.0) content = json.loads(resp.content) assert_true(content['status'] == 0, resp.content)
def test_show_tables(self): if is_live_cluster(): raise SkipTest('HUE-2902: Test is not re-entrant') # Set max limit to 3 resets = [ HS2_GET_TABLES_MAX.set_for_testing(3) ] try: hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 3) assert_equal(response.context['has_metadata'], True) assert_true('name' in response.context["tables"][0]) assert_true('comment' in response.context["tables"][0]) assert_true('type' in response.context["tables"][0]) hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables" % self.db_name) assert_equal(200, response.status_code) assert_equal(len(response.context['tables']), 5) assert_equal(response.context['has_metadata'], False) assert_true('name' in response.context["tables"][0]) assert_false('comment' in response.context["tables"][0], response.context["tables"]) assert_false('type' in response.context["tables"][0]) finally: for reset in resets: reset()
def test_browse_partitions(self): response = self.client.get( "/metastore/table/default/test_partitions/partitions/0", follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) page_context = [ context for context in response.context if 'results' in context ][0] assert_true(len(page_context['results']) > 10)
def test_show_tables(self): hql = """ CREATE TABLE test_show_tables_1 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_2 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_3 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables&format=json" % self.db_name) assert_equal(200, response.status_code) data = json.loads(response.content) assert_equal(len(data['tables']), 3) assert_true('name' in data["tables"][0]) assert_true('comment' in data["tables"][0]) assert_true('type' in data["tables"][0]) hql = """ CREATE TABLE test_show_tables_4 (a int) COMMENT 'Test for show_tables'; CREATE TABLE test_show_tables_5 (a int) COMMENT 'Test for show_tables'; """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Table should have been created response = self.client.get("/metastore/tables/%s?filter=show_tables&format=json" % self.db_name) assert_equal(200, response.status_code) data = json.loads(response.content) assert_equal(len(data['tables']), 5) assert_true('name' in data["tables"][0]) assert_true('comment' in data["tables"][0]) assert_true('type' in data["tables"][0]) hql = """ CREATE INDEX test_index ON TABLE test_show_tables_1 (a) AS 'COMPACT' WITH DEFERRED REBUILD; """ resp = _make_query(self.client, hql, wait=True, local=False, max=30.0, database=self.db_name) # By default, index table should not appear in show tables view response = self.client.get("/metastore/tables/%s?format=json" % self.db_name) assert_equal(200, response.status_code) data = json.loads(response.content) assert_false('test_index' in data['tables'])
def setUp(self): self.finish = [] # We need a real Impala cluster currently if not 'impala' in sys.argv and not os.environ.get('TEST_IMPALAD_HOST'): raise SkipTest if os.environ.get('TEST_IMPALAD_HOST'): self.finish.append(SERVER_HOST.set_for_testing(os.environ.get('TEST_IMPALAD_HOST'))) self.client = make_logged_in_client() self.user = User.objects.get(username='******') add_to_group('test') self.db = dbms.get(self.user, get_query_server_config(name='impala')) hql = """ USE default; DROP TABLE IF EXISTS %(db)s.tweets; DROP DATABASE IF EXISTS %(db)s; CREATE DATABASE %(db)s; USE %(db)s; """ % {'db': self.DATABASE} resp = _make_query(self.client, hql, database='default', local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) hql = """ CREATE TABLE tweets (row_num INTEGER, id_str STRING, text STRING) STORED AS PARQUET; INSERT INTO TABLE tweets VALUES (1, "531091827395682000", "My dad looks younger than costa"); INSERT INTO TABLE tweets VALUES (2, "531091827781550000", "There is a thin line between your partner being vengeful and you reaping the consequences of your bad actions towards your partner."); INSERT INTO TABLE tweets VALUES (3, "531091827768979000", "@Mustang_Sally83 and they need to get into you :))))"); INSERT INTO TABLE tweets VALUES (4, "531091827114668000", "@RachelZJohnson thank you rach!xxx"); INSERT INTO TABLE tweets VALUES (5, "531091827949309000", "i think @WWERollins was robbed of the IC title match this week on RAW also i wonder if he will get a rematch i hope so @WWE"); """ resp = _make_query(self.client, hql, database=self.DATABASE, local=False, server_name='impala') resp = wait_for_query_to_finish(self.client, resp, max=30.0) def tearDown(self): for f in self.finish: f()
def check(client, http_code): resp = _make_query(client, 'CREATE TABLE test_perm_1 (a int);') resp = wait_for_query_to_finish(client, resp, max=30.0) resp = client.get('/metastore/tables/drop/default', follow=True) #assert_true('want to delete' in resp.content, resp.content) assert_equal(resp.status_code, http_code, resp.content) resp = client.post('/metastore/tables/drop/default', {u'table_selection': [u'test_perm_1']}, follow=True) assert_equal(resp.status_code, http_code, resp.content)
def test_browse_partitions(self): response = self.client.get( "/metastore/table/default/test_partitions/partitions/0", follow=True) response = self.client.get(reverse( "beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0, results)
def save_and_verify(select_resp, target_dir, verify=True): qid = select_resp.context['query'].id save_data = { 'save_target': beeswax.forms.SaveResultsForm.SAVE_TYPE_DIR, 'target_dir': target_dir, 'save': True } resp = self.client.post('/beeswax/save_results/%s' % (qid,), save_data, follow=True) wait_for_query_to_finish(self.client, resp, max=60) # Check that data is right if verify: target_ls = self.cluster.fs.listdir(target_dir) assert_equal(1, len(target_ls)) target_file = self.cluster.fs.open(target_dir + '/' + target_ls[0]) data_buf = target_file.read() target_file.close() assert_equal(256, len(data_buf.strip().split('\n'))) assert_true('255' in data_buf) return resp
def test_query_with_udf(self): response = _make_query(self.client, "SELECT my_sqrt(foo), my_power(foo, foo) FROM test WHERE foo=4", udfs=[('my_sqrt', 'org.apache.hadoop.hive.ql.udf.UDFSqrt'), ('my_power', 'org.apache.hadoop.hive.ql.udf.UDFPower')], local=False) response = wait_for_query_to_finish(self.client, response, max=60.0) assert_equal(["2.0", "256.0"], response.context["results"][0]) log = response.context['log'] assert_true('ql.Driver: Total MapReduce jobs' in log, 'Captured log from Driver') assert_true('exec.ExecDriver: Starting Job = job_' in log, 'Captured log from MapRedTask') # Test job extraction while we're at it assert_equal(1, len(response.context["hadoop_jobs"]), "Should have started 1 job and extracted it.")
def test_basic_flow(self): dbs = self.db.get_databases() assert_true('_impala_builtins' in dbs, dbs) assert_true(self.DATABASE in dbs, dbs) tables = self.db.get_tables(database=self.DATABASE) assert_true('tweets' in tables, tables) QUERY = """ SELECT * FROM tweets ORDER BY row_num; """ response = _make_query(self.client, QUERY, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) response = wait_for_query_to_finish(self.client, response, max=180.0) results = [] # Check that we multiple fetches get all the result set while len(results) < 5: # We get less than 5 results most of the time, so increase offset content = fetch_query_result_data(self.client, response, n=len(results), server_name='impala') results += content['results'] assert_equal([1, 2, 3, 4, 5], [col[0] for col in results]) # Check start over results_start_over = [] while len(results_start_over) < 5: content = fetch_query_result_data(self.client, response, n=len(results_start_over), server_name='impala') results_start_over += content['results'] assert_equal(results_start_over, results) # Check cancel query resp = self.client.post( reverse('impala:api_cancel_query', kwargs={'query_history_id': query_history.id})) content = json.loads(resp.content) assert_equal(0, content['status'])
def _parallel_query_helper(self, i, result_holder, lock, num_tasks): client = make_logged_in_client() try: q = "SELECT foo+" + str(i + 1) + " FROM test WHERE foo < 2" LOG.info("Starting " + str(i) + ": " + q) response = _make_query(client, q) response = wait_for_query_to_finish(client, response, max=(240.0*num_tasks)) lock.acquire() result_holder[i] = response lock.release() LOG.info("Finished: " + str(i)) except: LOG.exception("Saw exception in child thread.")
def test_get_exec_summary(self): query = """ SELECT COUNT(1) FROM tweets; """ response = _make_query(self.client, query, database=self.DATABASE, local=False, server_name='impala') content = json.loads(response.content) query_history = QueryHistory.get(content['id']) wait_for_query_to_finish(self.client, response, max=180.0) resp = self.client.post(reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true(len(data['summary']['nodes']) > 0, data['summary']['nodes']) # Attempt to call get_exec_summary on a closed query resp = self.client.post(reverse('impala:get_exec_summary', kwargs={'query_history_id': query_history.id})) data = json.loads(resp.content) assert_equal(0, data['status'], data) assert_true('nodes' in data['summary'], data) assert_true(len(data['summary']['nodes']) > 0, data['summary']['nodes'])
def test_drop_multi_databases(self): hql = """ CREATE DATABASE test_drop_1; CREATE DATABASE test_drop_2; CREATE DATABASE test_drop_3; """ resp = _make_query(self.client, hql) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Drop them resp = self.client.get('/metastore/databases/drop', follow=True) assert_true('want to delete' in resp.content, resp.content) resp = self.client.post('/metastore/databases/drop', {u'database_selection': [u'test_drop_1', u'test_drop_2', u'test_drop_3']}) assert_equal(resp.status_code, 302)
def test_query_with_udf(self): """ Testing query with udf """ response = _make_query(self.client, "SELECT my_sqrt(foo), my_power(foo, foo) FROM test WHERE foo=4", udfs=[('my_sqrt', 'org.apache.hadoop.hive.ql.udf.UDFSqrt'), ('my_power', 'org.apache.hadoop.hive.ql.udf.UDFPower')], local=False) response = wait_for_query_to_finish(self.client, response, max=60.0) assert_equal(["2.0", "256.0"], response.context["results"][0]) log = response.context['log'] assert_true(search_log_line('ql.Driver', 'Total MapReduce jobs', log), 'Captured log from Driver in %s' % log) assert_true(search_log_line('exec.Task', 'Starting Job = job_', log), 'Captured log from MapRedTask in %s' % log) # Test job extraction while we're at it assert_equal(1, len(response.context["hadoop_jobs"]), "Should have started 1 job and extracted it.")
def test_drop_multi_tables(self): hql = """ CREATE TABLE test_drop_1 (a int); CREATE TABLE test_drop_2 (a int); CREATE TABLE test_drop_3 (a int); """ resp = _make_query(self.client, hql, database=self.db_name) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Drop them resp = self.client.get('/metastore/tables/drop/%s' % self.db_name, follow=True) assert_true('want to delete' in resp.content, resp.content) resp = self.client.post('/metastore/tables/drop/%s' % self.db_name, {u'table_selection': [u'test_drop_1', u'test_drop_2', u'test_drop_3']}) assert_equal(resp.status_code, 302)
def test_query_with_resource(self): script = self.cluster.fs.open("/square.py", "w") script.write( """#!/usr/bin/python import sys for x in sys.stdin: val = int(x) print val*val """) script.close() response = _make_query(self.client, "SELECT TRANSFORM (foo) USING 'python square.py' AS b FROM test", resources=[("FILE", "/square.py")], local=False) response = wait_for_query_to_finish(self.client, response, max=180.0) assert_equal([['0'], ['1'], ['4'], ['9']], response.context["results"][0:4])
def test_query_with_setting(self): response = _make_query(self.client, "CREATE TABLE test2 AS SELECT foo+1 FROM test WHERE foo=4", settings=[("mapred.job.name", "test_query_with_setting"), ("hive.exec.compress.output", "true")], local=False) # Run on MR, because that's how we check it worked. response = wait_for_query_to_finish(self.client, response, max=180.0) # Check that we actually got a compressed output files = self.cluster.fs.listdir("/user/hive/warehouse/test2") assert_true(len(files) >= 1) assert_true(files[0].endswith(".deflate")) # And check that the name is right... assert_true("test_query_with_setting" in [ x.profile.name for x in self.cluster.jt.all_jobs().jobs ]) # While we're at it, check that we're running jobs as the correct user on MR. assert_equal("test", [ x.profile for x in self.cluster.jt.all_jobs().jobs if x.profile.name == "test_query_with_setting" ][0].user)