def test_basic_flow(self): # Table should have been created response = self.client.get("/catalog/tables/") assert_true("test" in response.context["tables"]) # Switch databases response = self.client.get("/catalog/tables/default") assert_true("test" in response.context["tables"]) response = self.client.get("/catalog/tables/not_there") assert_false("test" in response.context["tables"]) # And have detail response = self.client.get("/catalog/table/default/test") assert_true("foo" in response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/catalog/table/default/test/read", follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(response.context['results']) > 10) # Column names assert_true("<td>foo</td>" in response.content) assert_true("<td>bar</td>" in response.content) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history') assert_equal(str(response.context['query_context'][0]), 'table') assert_equal(str(response.context['query_context'][1]), 'test:default')
def _make_query(client, query, submission_type="Execute", udfs=None, settings=None, resources=[], wait=False, name=None, desc=None, local=True, is_parameterized=True, max=30.0, database='default', email_notify=False, **kwargs): """Wrapper around the real make_query""" res = make_query(client, query, submission_type, udfs, settings, resources, wait, name, desc, local, is_parameterized, max, database, email_notify, **kwargs) # Should be in the history if it's submitted. if submission_type == 'Execute': fragment = collapse_whitespace(smart_str(query[:20])) verify_history(client, fragment=fragment) return res
def test_basic_flow(self): # Default database should exist response = self.client.get("/metastore/databases") assert_true("default" in response.context["databases"]) # Table should have been created response = self.client.get("/metastore/tables/") assert_true("test" in response.context["tables"]) # Switch databases response = self.client.get("/metastore/tables/default") assert_true("test" in response.context["tables"]) # Should default to "default" database response = self.client.get("/metastore/tables/not_there") assert_true("test" in response.context["tables"]) # And have detail response = self.client.get("/metastore/table/default/test") assert_true("foo" in response.content) assert_true("SerDe Library" in response.content, response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/metastore/table/default/test/read", follow=True) response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history')
def test_basic_flow(self): # Default database should exist response = self.client.get("/metastore/databases") assert_true(self.db_name in response.context["databases"]) # Table should have been created response = self.client.get("/metastore/tables/") assert_equal(200, response.status_code) # Switch databases response = self.client.get("/metastore/tables/%s" % self.db_name) assert_true('name' in response.context["tables"][0]) assert_true("test" in response.context["table_names"]) # Should default to "default" database response = self.client.get("/metastore/tables/not_there") assert_equal(200, response.status_code) # And have detail response = self.client.get("/metastore/table/%s/test" % self.db_name) assert_true("foo" in response.content) assert_true("SerDe Library" in response.content, response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/metastore/table/%s/test/read" % self.db_name, follow=True) response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history')
def _make_query(client, query, submission_type="Execute", follow=True, udfs=None, settings=None, resources=[], wait=False, name=None, desc=None, local=True, is_parameterized=True): """Wrapper around the real make_query""" res = make_query(client, query, submission_type, follow, udfs, settings, resources, wait, name, desc, local, is_parameterized) # Should be in the history if it's submitted. if submission_type == 'Execute': verify_history(client, fragment=collapse_whitespace(query[:20])) return res
def test_basic_flow(self): # Default database should exist response = self.client.get("/metastore/databases") assert_true("default" in response.context["databases"]) # Table should have been created response = self.client.get("/metastore/tables/") assert_true("test" in response.context["tables"]) # Switch databases response = self.client.get("/metastore/tables/default") assert_true("test" in response.context["tables"]) response = self.client.get("/metastore/tables/not_there") assert_false("test" in response.context["tables"]) # And have detail response = self.client.get("/metastore/table/default/test") assert_true("foo" in response.content) assert_true("serdeInfo:SerDeInfo" in response.content, response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/metastore/table/default/test/read", follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(response.context['results']) > 10) # Column names assert_true("foo" in response.content) assert_true("bar" in response.content) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history') assert_equal(str(response.context['query_context'][0]), 'table') assert_equal(str(response.context['query_context'][1]), 'test:default')
def test_basic_flow(self): # Default database should exist response = self.client.get("/metastore/databases") assert_true(self.db_name in response.context[0]["databases"]) # Table should have been created response = self.client.get("/metastore/tables/") assert_equal(200, response.status_code) # Switch databases response = self.client.get("/metastore/tables/%s?format=json" % self.db_name) data = json.loads(response.content) assert_true('name' in data["tables"][0]) assert_true("test" in data["table_names"]) # Should default to "default" database response = self.client.get("/metastore/tables/not_there") assert_equal(200, response.status_code) # And have detail response = self.client.post("/metastore/table/%s/test/?format=json" % self.db_name, {'format': 'json'}) data = json.loads(response.content) assert_true("foo" in [col['name'] for col in data['cols']]) assert_true("SerDe Library:" in [prop['col_name'] for prop in data['properties']], data) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/metastore/table/%s/test/read" % self.db_name, follow=True) response = self.client.get(reverse("beeswax:api_watch_query_refresh_json", kwargs={'id': response.context[0]['query'].id}), follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. results = fetch_query_result_data(self.client, response) assert_true(len(results['results']) > 0) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history')
def test_basic_flow(self): """ Test basic query submission """ # Minimal server operation assert_equal("echo", beeswax.db_utils.db_client().echo("echo")) # Table should have been created response = self.client.get("/beeswax/tables") assert_true("test" in response.context["tables"]) # And have detail response = self.client.get("/beeswax/table/test") assert_true("foo" in response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/beeswax/table/test/read", follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(response.context['results']) > 10) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history') assert_equal(str(response.context['query_context'][0]), 'table') assert_equal(str(response.context['query_context'][1]), 'test') # Query the data # We use a semicolon here for kicks; the code strips it out. QUERY = """ SELECT MIN(foo), MAX(foo), SUM(foo) FROM test; """ response = _make_query(self.client, QUERY) assert_true(response.redirect_chain[0][0].startswith("http://testserver/beeswax/watch/")) # Check that we report this query as "running". (This query takes a while.) self._verify_query_state(beeswax.models.QueryHistory.STATE.running) response = wait_for_query_to_finish(self.client, response, max=180.0) assert_equal(["0", "255", "32640"], response.context["results"][0]) # Because it happens that we're running this with mapred.job.tracker, # we won't see any hadoop jobs. assert_equal(0, len(response.context["hadoop_jobs"]), "Shouldn't have found jobs.") self._verify_query_state(beeswax.models.QueryHistory.STATE.available) # Query multi-page request QUERY = """ SELECT * FROM test """ response = _make_query(self.client, QUERY, name='select star', local=False) response = wait_for_query_to_finish(self.client, response) assert_equal(str(response.context['query_context'][0]), 'design') assert_true("<td>99</td>" in response.content) assert_true(response.context["has_more"]) response = self.client.get("/beeswax/results/%d/%d" % (response.context["query"].id, response.context["next_row"])) assert_true("<td>199</td>" in response.content) response = self.client.get("/beeswax/results/%d/0" % (response.context["query"].id)) assert_true("<td>99</td>" in response.content) assert_equal(0, len(response.context["hadoop_jobs"]), "SELECT * shouldn't have started jobs.") # Download the data response = self.client.get(response.context["download_urls"]["csv"]) # Header line plus data lines... assert_equal(257, response.content.count("\n"))
def test_basic_flow(self): """ Test basic query submission """ # Minimal server operation assert_equal("echo", beeswax.db_utils.db_client().echo("echo")) # Table should have been created response = self.client.get("/beeswax/tables") assert_true("test" in response.context["tables"]) # And have detail response = self.client.get("/beeswax/table/test") assert_true("foo" in response.content) # Remember the number of history items. Use a generic fragment 'test' to pass verification. history_cnt = verify_history(self.client, fragment='test') # Show table data. response = self.client.get("/beeswax/table/test/read", follow=True) response = wait_for_query_to_finish(self.client, response, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(response.context['results']) > 10) # This should NOT go into the query history. assert_equal(verify_history(self.client, fragment='test'), history_cnt, 'Implicit queries should not be saved in the history') assert_equal(str(response.context['query_context'][0]), 'table') assert_equal(str(response.context['query_context'][1]), 'test') # Query the data # We use a semicolon here for kicks; the code strips it out. QUERY = """ SELECT MIN(foo), MAX(foo), SUM(foo) FROM test; """ response = _make_query(self.client, QUERY) assert_true(response.redirect_chain[0][0].startswith( "http://testserver/beeswax/watch/")) # Check that we report this query as "running". (This query takes a while.) self._verify_query_state(beeswax.models.QueryHistory.STATE.running) response = wait_for_query_to_finish(self.client, response, max=180.0) assert_equal(["0", "255", "32640"], response.context["results"][0]) # Because it happens that we're running this with mapred.job.tracker, # we won't see any hadoop jobs. assert_equal(0, len(response.context["hadoop_jobs"]), "Shouldn't have found jobs.") self._verify_query_state(beeswax.models.QueryHistory.STATE.available) # Query multi-page request QUERY = """ SELECT * FROM test """ response = _make_query(self.client, QUERY, name='select star', local=False) assert_equal(2, len(response.context["download_urls"])) response = wait_for_query_to_finish(self.client, response) assert_equal(str(response.context['query_context'][0]), 'design') assert_true("<td>99</td>" in response.content) assert_true(response.context["has_more"]) response = self.client.get( "/beeswax/results/%d/%d" % (response.context["query"].id, response.context["next_row"])) assert_true("<td>199</td>" in response.content) response = self.client.get("/beeswax/results/%d/0" % (response.context["query"].id)) assert_true("<td>99</td>" in response.content) assert_equal(0, len(response.context["hadoop_jobs"]), "SELECT * shouldn't have started jobs.") # Download the data response = self.client.get(response.context["download_urls"]["csv"]) # Header line plus data lines... assert_equal(257, response.content.count("\n"))
def test_report_gen_view(self): """ Test report gen view logic and query generation. It requires Hive because report gen automatically gathers all the table names. """ cli = self.client resp = cli.get('/beeswax/report_gen') assert_true(resp.status_code, 200) # This generates a SELECT * and takes us to the execute page resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-advanced': 'Submit', }) assert_equal_mod_whitespace(r"SELECT test.* FROM test", resp.context["form"].query.initial["query"]) # Add a new column resp = cli.post("/beeswax/report_gen", { 'columns-add': 'True', 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.conds-0-op': '=', 'union.mgmt-next_form_id': '0' }) assert_true('columns-1-_exists' in resp.content) # Remove a sub form resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.mgmt-next_form_id': '1', 'union.sub0.bool-bool': 'AND', 'union.sub0.conds-next_form_id': '1', 'union.sub0.conds-0-_exists': 'True', 'union.sub0.mgmt-next_form_id': '0', 'union.sub0.mgmt-remove': 'True' }) assert_true('union.sub0' not in resp.content) # This generates a SELECT * and directly submits the query resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-submit': 'Submit', 'saveform-name': 'select star via report', 'saveform-save': 'True', }, follow=True) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(resp.context['results']) > 10) verify_history(cli, fragment='SELECT test.*', design='select star via report')
def test_report_gen_view(self): """ Test report gen view logic and query generation. It requires Hive because report gen automatically gathers all the table names. """ cli = self.client resp = cli.get('/beeswax/report_gen') assert_true(resp.status_code, 200) # This generates a SELECT * and takes us to the execute page resp = cli.post( "/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-advanced': 'Submit', }) assert_equal_mod_whitespace( r"SELECT test.* FROM test", resp.context["form"].query.initial["query"]) # Add a new column resp = cli.post( "/beeswax/report_gen", { 'columns-add': 'True', 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.conds-0-op': '=', 'union.mgmt-next_form_id': '0' }) assert_true('columns-1-_exists' in resp.content) # Remove a sub form resp = cli.post( "/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.mgmt-next_form_id': '1', 'union.sub0.bool-bool': 'AND', 'union.sub0.conds-next_form_id': '1', 'union.sub0.conds-0-_exists': 'True', 'union.sub0.mgmt-next_form_id': '0', 'union.sub0.mgmt-remove': 'True' }) assert_true('union.sub0' not in resp.content) # This generates a SELECT * and directly submits the query resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-submit': 'Submit', 'saveform-name': 'select star via report', 'saveform-save': 'True', }, follow=True) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(resp.context['results']) > 10) verify_history(cli, fragment='SELECT test.*', design='select star via report')