def test_drop_partition(self): # Create partition first partition_spec = "baz='baz_drop',boom='boom_drop'" hql = 'ALTER TABLE `%s`.`test_partitions` ADD IF NOT EXISTS PARTITION (%s);' % ( self.db_name, partition_spec) resp = _make_query(self.client, hql, database=self.db_name) wait_for_query_to_finish(self.client, resp, max=30.0) # Assert partition exists response = self.client.get( "/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_true("baz_drop" in response.content) # Drop partition self.client.post( "/metastore/table/%s/test_partitions/partitions/drop" % self.db_name, {'partition_selection': [partition_spec]}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace( "ALTER TABLE `%s`.`test_partitions` DROP IF EXISTS PARTITION (%s) PURGE" % (self.db_name, partition_spec), query.query) response = self.client.get( "/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_false("baz_drop" in response.content)
def test_drop_partition(self): # Create partition first partition_spec = "baz='baz_drop',boom='boom_drop'" hql = 'ALTER TABLE `%s`.`test_partitions` ADD IF NOT EXISTS PARTITION (%s);' % (self.db_name, partition_spec) resp = _make_query(self.client, hql, database=self.db_name) wait_for_query_to_finish(self.client, resp, max=30.0) # Assert partition exists response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_true("baz_drop" in response.content) # Drop partition self.client.post("/metastore/table/%s/test_partitions/partitions/drop" % self.db_name, {'partition_selection': [partition_spec]}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("ALTER TABLE `%s`.`test_partitions` DROP IF EXISTS PARTITION (%s) PURGE" % (self.db_name, partition_spec), query.query) response = self.client.get("/metastore/table/%s/test_partitions/partitions" % self.db_name) assert_false("baz_drop" in response.content)
def test_create_table_generation(self): """ Checks HQL generation for create table. NOT TESTED/DONE: Validation checks for the inputs. """ # Make sure we get a form resp = self.client.get("/beeswax/create/create_table") assert_true("Field terminator" in resp.content) # Make a submission resp = self.client.post( "/beeswax/create/create_table", { 'table-name': 'my_table', 'table-comment': 'Yo>>>>dude', # Make sure escaping is sort of ok. 'table-row_format': 'Delimited', 'table-field_terminator_0': r'\001', 'table-collection_terminator_0': r'\002', 'table-map_key_terminator_0': r'\003', 'table-file_format': 'TextFile', 'table-use_default_location': 'False', 'table-external_location': '/tmp/foo', 'columns-0-column_name': 'my_col', 'columns-0-column_type': 'string', 'columns-0-_exists': 'True', 'columns-next_form_id': '1', 'partitions-next_form_id': '0', }) assert_equal_mod_whitespace( r""" CREATE EXTERNAL TABLE `my_table` ( `my_col` string ) COMMENT "Yo>>>>dude" ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS TextFile LOCATION "/tmp/foo" """, resp.context["form"].query.initial["query"]) assert_true( '/beeswax/table/my_table' in resp.context['on_success_url'])
def test_partitioned_create_table(self): """ Test HQL generation of create table with partition columns """ # Make sure we get a form resp = self.client.get("/beeswax/create/create_table") assert_true("Field terminator" in resp.content) # Make a submission resp = self.client.post( "/beeswax/create/create_table", { 'table-name': 'my_table', 'table-row_format': 'Delimited', 'table-field_terminator_0': r'\001', 'table-collection_terminator_0': r'\002', 'table-map_key_terminator_0': r'\003', 'table-file_format': 'TextFile', 'table-use_default_location': 'True', 'columns-0-column_name': 'my_col', 'columns-0-column_type': 'string', 'columns-0-_exists': 'True', 'columns-next_form_id': '1', 'partitions-0-column_name': 'my_partition', 'partitions-0-column_type': 'string', 'partitions-0-_exists': 'True', 'partitions-next_form_id': '1', }) assert_equal_mod_whitespace( r""" CREATE TABLE `my_table` ( `my_col` string ) PARTITIONED BY ( `my_partition` string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS TextFile """, resp.context["form"].query.initial["query"])
def test_partitioned_create_table(self): """ Test HQL generation of create table with partition columns """ # Make sure we get a form resp = self.client.get("/beeswax/create/create_table/default") assert_true("Field terminator" in resp.content) # Make a submission resp = self.client.post("/beeswax/create/create_table/default", { 'table-name': 'my_table2', 'table-row_format': 'Delimited', 'table-field_terminator_0': r'\001', 'table-collection_terminator_0': r'\002', 'table-map_key_terminator_0': r'\003', 'table-file_format': 'TextFile', 'table-use_default_location': 'True', 'columns-0-column_name': 'my_col', 'columns-0-column_type': 'string', 'columns-0-_exists': 'True', 'columns-next_form_id': '1', 'partitions-0-column_name': 'my_partition', 'partitions-0-column_type': 'string', 'partitions-0-_exists': 'True', 'partitions-next_form_id': '1', 'create': 'Create table', }, follow=True) assert_equal_mod_whitespace(""" CREATE TABLE `default.my_table2` ( `my_col` string ) PARTITIONED BY ( `my_partition` string ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\001' COLLECTION ITEMS TERMINATED BY '\\002' MAP KEYS TERMINATED BY '\\003' STORED AS TextFile """, resp.context['query'].query)
def test_create_table_generation(self): """ Checks HQL generation for create table. NOT TESTED/DONE: Validation checks for the inputs. """ # Make sure we get a form resp = self.client.get("/beeswax/create/create_table/default") assert_true("Field terminator" in resp.content) # Make a submission resp = self.client.post("/beeswax/create/create_table/default", { 'table-name': 'my_table', 'table-comment': 'Yo>>>>dude', # Make sure escaping is sort of ok. 'table-row_format': 'Delimited', 'table-field_terminator_0': r'\001', 'table-collection_terminator_0': r'\002', 'table-map_key_terminator_0': r'\003', 'table-file_format': 'TextFile', 'table-use_default_location': 'False', 'table-external_location': '/tmp/foo', 'columns-0-column_name': 'my_col', 'columns-0-column_type': 'string', 'columns-0-_exists': 'True', 'columns-next_form_id': '1', 'partitions-next_form_id': '0', 'create': 'Create table', }, follow=True) assert_equal_mod_whitespace(""" CREATE EXTERNAL TABLE `default.my_table` ( `my_col` string ) COMMENT "Yo>>>>dude" ROW FORMAT DELIMITED FIELDS TERMINATED BY '\\001' COLLECTION ITEMS TERMINATED BY '\\002' MAP KEYS TERMINATED BY '\\003' STORED AS TextFile LOCATION "/tmp/foo" """, resp.context['query'].query) assert_true('on_success_url=%2Fbeeswax%2Ftable%2Fdefault%2Fmy_table' in resp.context['fwd_params'], resp.context['fwd_params'])
def test_load_data(self): """ Test load data queries. These require Hadoop, because they ask the metastore about whether a table is partitioned. """ # Check that view works resp = self.client.get("/metastore/table/%s/test/load" % self.db_name, follow=True) assert_true('Path' in resp.content) data_path = '%(prefix)s/tmp/foo' % {'prefix': self.cluster.fs_prefix} # Try the submission self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': True}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '%(data_path)s' OVERWRITE INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query) resp = self.client.post("/metastore/table/%s/test/load" % self.db_name, {'path': data_path, 'overwrite': False}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '%(data_path)s' INTO TABLE `%(db)s`.`test`" % {'data_path': data_path, 'db': self.db_name}, query.query) # Try it with partitions resp = self.client.post("/metastore/table/%s/test_partitions/load" % self.db_name, {'path': data_path, 'partition_0': "alpha", 'partition_1': "beta"}, follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace(query.query, "LOAD DATA INPATH '%(data_path)s' INTO TABLE `%(db)s`.`test_partitions` PARTITION (baz='alpha', boom='beta')" % {'data_path': data_path, 'db': self.db_name})
def test_load_data(self): """ Test load data queries. These require Hadoop, because they ask the metastore about whether a table is partitioned. """ # Check that view works resp = self.client.get("/beeswax/table/test/load") assert_true(resp.context["form"]) # Try the submission resp = self.client.post("/beeswax/table/test/load", dict(path="/tmp/foo", overwrite=True)) assert_equal_mod_whitespace( "LOAD DATA INPATH '/tmp/foo' OVERWRITE INTO TABLE `test`", resp.context["form"].query.initial["query"]) resp = self.client.post("/beeswax/table/test/load", dict(path="/tmp/foo", overwrite=False)) assert_equal_mod_whitespace( "LOAD DATA INPATH '/tmp/foo' INTO TABLE `test`", resp.context["form"].query.initial["query"]) # Try it with partitions resp = self.client.post( "/beeswax/table/test_partitions/load", dict(path="/tmp/foo", partition_0="alpha", partition_1="beta")) assert_equal_mod_whitespace( "LOAD DATA INPATH '/tmp/foo' INTO TABLE `test_partitions` PARTITION (baz='alpha', boom='beta')", resp.context["form"].query.initial["query"])
def test_load_data(self): """ Test load data queries. These require Hadoop, because they ask the metastore about whether a table is partitioned. """ # Check that view works resp = self.client.get("/metastore/table/default/test/load", follow=True) assert_true('Path' in resp.content) # Try the submission self.client.post("/metastore/table/default/test/load", dict(path="/tmp/foo", overwrite=True), follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' OVERWRITE INTO TABLE `default.test`", query.query) resp = self.client.post("/metastore/table/default/test/load", dict(path="/tmp/foo", overwrite=False), follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' INTO TABLE `default.test`", query.query) # Try it with partitions resp = self.client.post("/metastore/table/default/test_partitions/load", dict(path="/tmp/foo", partition_0="alpha", partition_1="beta"), follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' INTO TABLE `default.test_partitions` PARTITION (baz='alpha', boom='beta')", query.query)
def test_load_data(self): """ Test load data queries. These require Hadoop, because they ask the metastore about whether a table is partitioned. """ # Check that view works resp = self.client.get("/catalog/table/default/test/load", follow=True) assert_true('Path' in resp.content) # Try the submission self.client.post("/catalog/table/default/test/load", dict(path="/tmp/foo", overwrite=True), follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' OVERWRITE INTO TABLE `default.test`", query.query) resp = self.client.post("/catalog/table/default/test/load", dict(path="/tmp/foo", overwrite=False), follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' INTO TABLE `default.test`", query.query) # Try it with partitions resp = self.client.post("/catalog/table/default/test_partitions/load", dict(path="/tmp/foo", partition_0="alpha", partition_1="beta"), follow=True) query = QueryHistory.objects.latest('id') assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' INTO TABLE `default.test_partitions` PARTITION (baz='alpha', boom='beta')", query.query)
def test_load_data(self): """ Test load data queries. These require Hadoop, because they ask the metastore about whether a table is partitioned. """ # Check that view works resp = self.client.get("/beeswax/table/test/load") assert_true(resp.context["form"]) # Try the submission resp = self.client.post("/beeswax/table/test/load", dict(path="/tmp/foo", overwrite=True)) assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' OVERWRITE INTO TABLE `test`", resp.context["form"].query.initial["query"]) resp = self.client.post("/beeswax/table/test/load", dict(path="/tmp/foo", overwrite=False)) assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' INTO TABLE `test`", resp.context["form"].query.initial["query"]) # Try it with partitions resp = self.client.post("/beeswax/table/test_partitions/load", dict(path="/tmp/foo", partition_0="alpha", partition_1="beta")) assert_equal_mod_whitespace("LOAD DATA INPATH '/tmp/foo' INTO TABLE `test_partitions` PARTITION (baz='alpha', boom='beta')", resp.context["form"].query.initial["query"])
def test_report_gen_query(): """ Tests HQL generation backend """ # Table manifest table = QTable('test_table') assert_equal_mod_whitespace(table.manifest(is_from=False), 'test_table') assert_equal_mod_whitespace(table.manifest(is_from=True), 'test_table') table.alias = 'foo' assert_equal_mod_whitespace(table.manifest(is_from=False), 'foo') assert_equal_mod_whitespace(table.manifest(is_from=True), 'test_table foo') # Column manifest col = ColumnSelection(table, 'col') assert_equal_mod_whitespace(col.manifest(), 'foo.col') assert_equal_mod_whitespace(col.manifest(is_select=True), 'foo.col') assert_equal_mod_whitespace(col.manifest(is_sort=True), 'foo.col') col.alias = 'X' assert_equal_mod_whitespace(col.manifest(), 'foo.col') assert_equal_mod_whitespace(col.manifest(is_select=True), 'foo.col AS X') assert_equal_mod_whitespace(col.manifest(is_sort=True), 'X') # Const manifest simple_int = ConstSelection('69') simple_int.alias = 'INT' assert_equal_mod_whitespace(simple_int.manifest(), '69') konst = ConstSelection('quote-"') assert_equal_mod_whitespace(konst.manifest(), '"quote-\\""') assert_equal_mod_whitespace(konst.manifest(is_select=True), '"quote-\\""') konst.alias = 'Y' assert_equal_mod_whitespace(konst.manifest(), '"quote-\\""') assert_equal_mod_whitespace(konst.manifest(is_select=True), '"quote-\\"" AS Y') # Boolean condition manifest bool_cond = BooleanCondition(col, '<>', konst) assert_true(not bool_cond.is_joinable()) assert_equal_mod_whitespace(bool_cond.manifest(), 'foo.col <> "quote-\\""') union_root = LogicalUnion('AND') union_root.add_cond(bool_cond) assert_equal_mod_whitespace(union_root.manifest(), '( foo.col <> "quote-\\"" )') union_root.compact() assert_equal_mod_whitespace(union_root.manifest(), '( foo.col <> "quote-\\"" )') union_root.add_cond(BooleanCondition(simple_int, '=', simple_int)) assert_equal_mod_whitespace(union_root.manifest(), '( foo.col <> "quote-\\"" AND 69 = 69 )') union_sub = LogicalUnion('OR') union_sub.add_cond(BooleanCondition(col, 'IS NULL')) union_root.add_subunion(union_sub) assert_equal(union_root.size(), 3) assert_equal_mod_whitespace(union_root.manifest(), '( foo.col <> "quote-\\"" AND 69 = 69 AND ( foo.col IS NULL ) )') # Test union compaction dumb_union = LogicalUnion('AND') dumb_union.add_subunion(union_root) assert_equal_mod_whitespace(dumb_union.manifest(), '( ( foo.col <> "quote-\\"" AND 69 = 69 AND ( foo.col IS NULL ) ) )') dumb_union.compact() assert_equal_mod_whitespace(dumb_union.manifest(), '( foo.col <> "quote-\\"" AND 69 = 69 AND ( foo.col IS NULL ) )')
def test_report_gen_view(self): """ Test report gen view logic and query generation. It requires Hive because report gen automatically gathers all the table names. """ cli = self.client resp = cli.get('/beeswax/report_gen') assert_true(resp.status_code, 200) # This generates a SELECT * and takes us to the execute page resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-advanced': 'Submit', }) assert_equal_mod_whitespace(r"SELECT test.* FROM test", resp.context["form"].query.initial["query"]) # Add a new column resp = cli.post("/beeswax/report_gen", { 'columns-add': 'True', 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.conds-0-op': '=', 'union.mgmt-next_form_id': '0' }) assert_true('columns-1-_exists' in resp.content) # Remove a sub form resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.mgmt-next_form_id': '1', 'union.sub0.bool-bool': 'AND', 'union.sub0.conds-next_form_id': '1', 'union.sub0.conds-0-_exists': 'True', 'union.sub0.mgmt-next_form_id': '0', 'union.sub0.mgmt-remove': 'True' }) assert_true('union.sub0' not in resp.content) # This generates a SELECT * and directly submits the query resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-submit': 'Submit', 'saveform-name': 'select star via report', 'saveform-save': 'True', }, follow=True) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(resp.context['results']) > 10) verify_history(cli, fragment='SELECT test.*', design='select star via report')
def test_report_gen_query(): """ Tests HQL generation backend """ # Table manifest table = QTable('test_table') assert_equal_mod_whitespace(table.manifest(is_from=False), 'test_table') assert_equal_mod_whitespace(table.manifest(is_from=True), 'test_table') table.alias = 'foo' assert_equal_mod_whitespace(table.manifest(is_from=False), 'foo') assert_equal_mod_whitespace(table.manifest(is_from=True), 'test_table foo') # Column manifest col = ColumnSelection(table, 'col') assert_equal_mod_whitespace(col.manifest(), 'foo.col') assert_equal_mod_whitespace(col.manifest(is_select=True), 'foo.col') assert_equal_mod_whitespace(col.manifest(is_sort=True), 'foo.col') col.alias = 'X' assert_equal_mod_whitespace(col.manifest(), 'foo.col') assert_equal_mod_whitespace(col.manifest(is_select=True), 'foo.col AS X') assert_equal_mod_whitespace(col.manifest(is_sort=True), 'X') # Const manifest simple_int = ConstSelection('69') simple_int.alias = 'INT' assert_equal_mod_whitespace(simple_int.manifest(), '69') konst = ConstSelection('quote-"') assert_equal_mod_whitespace(konst.manifest(), '"quote-\\""') assert_equal_mod_whitespace(konst.manifest(is_select=True), '"quote-\\""') konst.alias = 'Y' assert_equal_mod_whitespace(konst.manifest(), '"quote-\\""') assert_equal_mod_whitespace(konst.manifest(is_select=True), '"quote-\\"" AS Y') # Boolean condition manifest bool_cond = BooleanCondition(col, '<>', konst) assert_true(not bool_cond.is_joinable()) assert_equal_mod_whitespace(bool_cond.manifest(), 'foo.col <> "quote-\\""') union_root = LogicalUnion('AND') union_root.add_cond(bool_cond) assert_equal_mod_whitespace(union_root.manifest(), '( foo.col <> "quote-\\"" )') union_root.compact() assert_equal_mod_whitespace(union_root.manifest(), '( foo.col <> "quote-\\"" )') union_root.add_cond(BooleanCondition(simple_int, '=', simple_int)) assert_equal_mod_whitespace(union_root.manifest(), '( foo.col <> "quote-\\"" AND 69 = 69 )') union_sub = LogicalUnion('OR') union_sub.add_cond(BooleanCondition(col, 'IS NULL')) union_root.add_subunion(union_sub) assert_equal(union_root.size(), 3) assert_equal_mod_whitespace( union_root.manifest(), '( foo.col <> "quote-\\"" AND 69 = 69 AND ( foo.col IS NULL ) )') # Test union compaction dumb_union = LogicalUnion('AND') dumb_union.add_subunion(union_root) assert_equal_mod_whitespace( dumb_union.manifest(), '( ( foo.col <> "quote-\\"" AND 69 = 69 AND ( foo.col IS NULL ) ) )') dumb_union.compact() assert_equal_mod_whitespace( dumb_union.manifest(), '( foo.col <> "quote-\\"" AND 69 = 69 AND ( foo.col IS NULL ) )')
def test_report_gen_view(self): """ Test report gen view logic and query generation. It requires Hive because report gen automatically gathers all the table names. """ cli = self.client resp = cli.get('/beeswax/report_gen') assert_true(resp.status_code, 200) # This generates a SELECT * and takes us to the execute page resp = cli.post( "/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-advanced': 'Submit', }) assert_equal_mod_whitespace( r"SELECT test.* FROM test", resp.context["form"].query.initial["query"]) # Add a new column resp = cli.post( "/beeswax/report_gen", { 'columns-add': 'True', 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.conds-0-op': '=', 'union.mgmt-next_form_id': '0' }) assert_true('columns-1-_exists' in resp.content) # Remove a sub form resp = cli.post( "/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'union.bool-bool': 'AND', 'union.conds-next_form_id': '1', 'union.conds-0-_exists': 'True', 'union.mgmt-next_form_id': '1', 'union.sub0.bool-bool': 'AND', 'union.sub0.conds-next_form_id': '1', 'union.sub0.conds-0-_exists': 'True', 'union.sub0.mgmt-next_form_id': '0', 'union.sub0.mgmt-remove': 'True' }) assert_true('union.sub0' not in resp.content) # This generates a SELECT * and directly submits the query resp = cli.post("/beeswax/report_gen", { 'columns-next_form_id': '1', 'columns-0-_exists': 'True', 'columns-0-col': '*', 'columns-0-display': 'on', 'columns-0-source': 'table', 'columns-0-table': 'test', 'union.conds-next_form_id': '0', 'union.bool-bool': 'AND', 'union.mgmt-next_form_id': '0', 'button-submit': 'Submit', 'saveform-name': 'select star via report', 'saveform-save': 'True', }, follow=True) resp = wait_for_query_to_finish(self.client, resp, max=30.0) # Note that it may not return all rows at once. But we expect at least 10. assert_true(len(resp.context['results']) > 10) verify_history(cli, fragment='SELECT test.*', design='select star via report')