def test_get_sample(self): doc = self.create_query_document(owner=self.user, statement=self.statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) response = self.client.post(reverse('notebook:api_sample_data', kwargs={'database': 'default', 'table': 'sample_07'}), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('headers' in data) assert_true('rows' in data) assert_true(len(data['rows']) > 0) response = self.client.post(reverse('notebook:api_sample_data_column', kwargs={'database': 'default', 'table': 'sample_07', 'column': 'code'}), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('headers' in data) assert_equal(['code'], data['headers']) assert_true('rows' in data) assert_true(len(data['rows']) > 0)
def test_fetch_result_abbreviated(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows statement = "SELECT * FROM web_logs;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post(reverse('notebook:fetch_result_data'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'}) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(1000, data['result']['rows']) finally: self.api.close_session(session)
def test_get_current_statement(self): multi_statement = "SELECT description, salary FROM sample_07 LIMIT 20;\r\nSELECT AVG(salary) FROM sample_07;" doc = self.create_query_document(owner=self.user, statement=multi_statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) response = self.client.post(reverse('notebook:execute'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_equal(0, data['handle']['statement_id'], data) assert_equal(2, data['handle']['statements_count'], data) assert_equal(True, data['handle']['has_more_statements'], data) assert_equal({'row': 0, 'column': 0}, data['handle']['start'], data) assert_equal({'row': 0, 'column': 51}, data['handle']['end'], data) snippet['result']['handle'] = data['handle'] response = self.client.post(reverse('notebook:execute'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_equal(1, data['handle']['statement_id'], data) assert_equal(2, data['handle']['statements_count'], data) assert_equal(False, data['handle']['has_more_statements'], data) assert_equal({'row': 1, 'column': 0}, data['handle']['start'], data) assert_equal({'row': 1, 'column': 33}, data['handle']['end'], data)
def test_get_current_statement(self): multi_statement = "SELECT description, salary FROM sample_07 LIMIT 20;\r\nSELECT AVG(salary) FROM sample_07;" doc = self.create_query_document(owner=self.user, statement=multi_statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) response = self.client.post(reverse('notebook:execute'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_equal(0, data['handle']['statement_id'], data) assert_equal(2, data['handle']['statements_count'], data) assert_equal(True, data['handle']['has_more_statements'], data) assert_equal({'row': 0, 'column': 0}, data['handle']['start'], data) assert_equal({'row': 0, 'column': 51}, data['handle']['end'], data) snippet['result']['handle'] = data['handle'] response = self.client.post(reverse('notebook:execute'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_equal(1, data['handle']['statement_id'], data) assert_equal(2, data['handle']['statements_count'], data) assert_equal(False, data['handle']['has_more_statements'], data) assert_equal({'row': 1, 'column': 0}, data['handle']['start'], data) assert_equal({'row': 1, 'column': 33}, data['handle']['end'], data)
def test_fetch_result_size_mr(self): if not is_live_cluster(): # Mini-cluster does not have JHS raise SkipTest # Assert that a query with no job will return no rows or size statement = "SELECT 'hello world';" settings = [ { 'key': 'hive.execution.engine', 'value': 'mr' } ] doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(None, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that a query with map & reduce task returns rows statement = "SELECT DISTINCT code FROM sample_07;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(823, data['result']['rows']) assert_true(data['result']['size'] > 0, data['result']) # Assert that a query with multiple jobs returns rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(23, data['result']['rows']) assert_true(data['result']['size'] > 0, data['result'])
def execute_and_wait(self, query_doc, snippet_idx=0, timeout=30.0, wait=1.0): notebook = Notebook(document=query_doc) snippet = self.get_snippet(notebook, snippet_idx=snippet_idx) curr = time.time() end = curr + timeout status = 'ready' response = self.client.post(reverse('notebook:execute'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) notebook = Notebook(document=query_doc) snippet = self.get_snippet(notebook, snippet_idx=snippet_idx) data = json.loads(response.content) snippet['result']['handle'] = data['handle'] while status != 'available' and curr <= end: response = self.client.post(reverse('notebook:check_status'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) status = data['query_status']['status'] snippet['status'] = status time.sleep(wait) curr = time.time() if status != 'available': raise Exception('Query failed to complete or return results.') return snippet
def test_fetch_result_size_spark(self): if not is_live_cluster() or not is_hive_on_spark(): raise SkipTest # TODO: Add session cleanup here so we don't have orphan spark sessions # Assert that a query with no job will return no rows or size statement = "SELECT 'hello world';" settings = [{'key': 'hive.execution.engine', 'value': 'spark'}] doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(None, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that a query that runs a job will return rows and size statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_true(data['result']['rows'] > 0) assert_true(data['result']['size'] > 0)
def test_fetch_result_size_impala(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that a query that runs a job will return rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post(reverse('notebook:fetch_result_data'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'}) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(23, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that selecting all from partitioned table works statement = "SELECT * FROM web_logs;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post(reverse('notebook:fetch_result_data'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false'}) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(1000, data['result']['rows']) finally: self.api.close_session(session)
def test_fetch_result_size_spark(self): if not is_live_cluster() or not is_hive_on_spark(): raise SkipTest # TODO: Add session cleanup here so we don't have orphan spark sessions # Assert that a query with no job will return no rows or size statement = "SELECT 'hello world';" settings = [ { 'key': 'hive.execution.engine', 'value': 'spark' } ] doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(None, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that a query that runs a job will return rows and size statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(23, data['result']['rows']) assert_true(data['result']['size'] > 0)
def test_query_with_unicode(self): statement = "SELECT * FROM sample_07 WHERE code='한';" doc = self.create_query_document(owner=self.user, statement=statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) response = self.client.post(reverse('notebook:execute'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) snippet['result']['handle'] = data['handle'] response = self.client.post(reverse('notebook:get_logs'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true("SELECT * FROM sample_07 WHERE code='한'" in smart_str(data['logs']))
def test_fetch_result_size_impala(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that a query that runs a job will return rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) self.client.post( reverse('notebook:fetch_result_data'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false' }) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(23, data['result']['rows']) assert_equal(None, data['result']['size']) finally: self.api.close_session(session)
def test_fetch_result_abbreviated(self): if not is_live_cluster(): raise SkipTest # Create session so that session object is saved to DB for server URL lookup session = self.api.create_session(lang='impala') try: # Assert that abbreviated rows returned (e.g. - 1.00K) still returns actual rows statement = "SELECT * FROM web_logs;" doc = self.create_query_document(owner=self.user, query_type='impala', statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=5.0) self.client.post( reverse('notebook:fetch_result_data'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'rows': 100, 'startOver': 'false' }) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_equal(1000, data['result']['rows']) finally: self.api.close_session(session)
def test_download(self): statement = "SELECT 'hello world';" doc = self.create_query_document(owner=self.user, statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:download'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'format': 'csv'}) assert_equal(200, response.status_code) assert_equal(('Content-Disposition', 'attachment; filename=Test Query.csv'), response._headers['content-disposition'])
def test_get_sample(self): doc = self.create_query_document(owner=self.user, statement=self.statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) response = self.client.post( reverse('notebook:api_sample_data', kwargs={ 'database': 'default', 'table': 'sample_07' }), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('headers' in data) assert_true('rows' in data) assert_true(len(data['rows']) > 0) response = self.client.post( reverse('notebook:api_sample_data_column', kwargs={ 'database': 'default', 'table': 'sample_07', 'column': 'code' }), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('headers' in data) assert_equal(['code'], data['headers']) assert_true('rows' in data) assert_true(len(data['rows']) > 0)
def test_query_with_unicode(self): statement = "SELECT * FROM sample_07 WHERE code='한';" doc = self.create_query_document(owner=self.user, statement=statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) response = self.client.post(reverse('notebook:execute'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) snippet['result']['handle'] = data['handle'] response = self.client.post(reverse('notebook:get_logs'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true("SELECT * FROM sample_07 WHERE code='한'" in smart_str( data['logs']))
def execute_and_wait(self, query_doc, snippet_idx=0, timeout=30.0, wait=1.0): notebook = Notebook(document=query_doc) snippet = self.get_snippet(notebook, snippet_idx=snippet_idx) curr = time.time() end = curr + timeout status = 'ready' response = self.client.post(reverse('notebook:execute'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) notebook = Notebook(document=query_doc) snippet = self.get_snippet(notebook, snippet_idx=snippet_idx) data = json.loads(response.content) snippet['result']['handle'] = data['handle'] while status != 'available' and curr <= end: response = self.client.post(reverse('notebook:check_status'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) status = data['query_status']['status'] snippet['status'] = status time.sleep(wait) curr = time.time() if status != 'available': raise Exception('Query failed to complete or return results.') return snippet
def test_explain(self): # Hive 2 with Tez set hive.explain.user to true by default, but this test is expecting output when this setting # is set to false. doc = self.create_query_document(owner=self.user, statement=self.statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) snippet['properties']['settings'].append({"key": "hive.explain.user", "value": "false"}) response = self.client.post(reverse('notebook:explain'), {'notebook': notebook.get_json(), 'snippet': json.dumps(snippet)}) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('STAGE DEPENDENCIES' in data['explanation'], data) assert_equal(self.statement, data['statement'], data)
def test_download(self): statement = "SELECT 'hello world';" doc = self.create_query_document(owner=self.user, statement=statement) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post( reverse('notebook:download'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet), 'format': 'csv' }) assert_equal(200, response.status_code) assert_equal( ('Content-Disposition', 'attachment; filename=Test Query.csv'), response._headers['content-disposition'])
def test_explain(self): # Hive 2 with Tez set hive.explain.user to true by default, but this test is expecting output when this setting # is set to false. doc = self.create_query_document(owner=self.user, statement=self.statement) notebook = Notebook(document=doc) snippet = self.get_snippet(notebook, snippet_idx=0) snippet['properties']['settings'].append({ "key": "hive.explain.user", "value": "false" }) response = self.client.post(reverse('notebook:explain'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('STAGE DEPENDENCIES' in data['explanation'], data) assert_equal(self.statement, data['statement'], data)
def test_fetch_result_size_mr(self): if not is_live_cluster(): # Mini-cluster does not have JHS raise SkipTest # Assert that a query with no job will return no rows or size statement = "SELECT 'hello world';" settings = [{'key': 'hive.execution.engine', 'value': 'mr'}] doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true('size' in data['result']) assert_equal(None, data['result']['rows']) assert_equal(None, data['result']['size']) # Assert that a query with map & reduce task returns rows statement = "SELECT DISTINCT code FROM sample_07;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true(data['result']['rows'] > 0) # Assert that a query with multiple jobs returns rows statement = "SELECT app, COUNT(1) AS count FROM web_logs GROUP BY app ORDER BY count DESC;" doc = self.create_query_document(owner=self.user, statement=statement, settings=settings) notebook = Notebook(document=doc) snippet = self.execute_and_wait(doc, snippet_idx=0, timeout=60.0, wait=2.0) response = self.client.post(reverse('notebook:fetch_result_size'), { 'notebook': notebook.get_json(), 'snippet': json.dumps(snippet) }) data = json.loads(response.content) assert_equal(0, data['status'], data) assert_true('result' in data) assert_true('rows' in data['result']) assert_true(data['result']['rows'] > 0)