def test_role_privilege_case(self, vector): """IMPALA-5582: Store sentry privileges in lower case. This test grants select privileges to roles assgined to tables/db specified in lower, upper and mix cases. This test verifies that these privileges do not vanish on a sentryProxy thread update. """ db_name = "test_role_privilege_case_x_" + get_random_id(5) db_name_upper_case = "TEST_ROLE_PRIVILEGE_CASE_Y_" + get_random_id(5).upper() db_name_mixed_case = "TesT_Role_PRIVIlege_case_z" + get_random_id(5) role_name = "test_role_" + get_random_id(5) try: self.client.execute("create role {0}".format(role_name)) self.client.execute("grant all on server to {0}".format(role_name)) self.client.execute( "grant role {0} to group `{1}`".format( role_name, grp.getgrnam(getuser()).gr_name)) self.client.execute("create database " + db_name) self.client.execute("create database " + db_name_upper_case) self.client.execute("create database " + db_name_mixed_case) self.client.execute( "create table if not exists {0}.test1(i int)".format(db_name)) self.client.execute("create table if not exists {0}.TEST2(i int)".format(db_name)) self.client.execute("create table if not exists {0}.Test3(i int)".format(db_name)) self.client.execute( "grant select on table {0}.test1 to {1}".format(db_name, role_name)) self.client.execute( "grant select on table {0}.TEST2 to {1}".format(db_name, role_name)) self.client.execute( "grant select on table {0}.TesT3 to {1}".format(db_name, role_name)) self.client.execute("grant all on database {0} to {1}".format(db_name, role_name)) self.client.execute( "grant all on database {0} to {1}".format(db_name_upper_case, role_name)) self.client.execute( "grant all on database {0} to {1}".format(db_name_mixed_case, role_name)) result = self.client.execute("show grant role {0}".format(role_name)) assert any('test1' in x for x in result.data) assert any('test2' in x for x in result.data) assert any('test3' in x for x in result.data) assert any(db_name_upper_case.lower() in x for x in result.data) assert any(db_name_mixed_case.lower() in x for x in result.data) # Sleep for 2 seconds and make sure that the privileges # on all 3 tables still persist on a sentryProxy thread # update. sentry_catalog_polling_frequency_s is set to 1 # seconds. sleep(2) result = self.client.execute("show grant role {0}".format(role_name)) assert any('test1' in x for x in result.data) assert any('test2' in x for x in result.data) assert any('test3' in x for x in result.data) assert any(db_name_upper_case.lower() in x for x in result.data) assert any(db_name_mixed_case.lower() in x for x in result.data) finally: self.client.execute("drop database if exists {0}".format(db_name_upper_case)) self.client.execute("drop database if exists {0}".format(db_name_mixed_case)) self.client.execute("drop database if exists {0} cascade".format(db_name)) self.client.execute("drop role {0}".format(role_name))
def test_role_privilege_case(self, vector): """IMPALA-5582: Store sentry privileges in lower case. This test grants select privileges to roles assgined to tables/db specified in lower, upper and mix cases. This test verifies that these privileges do not vanish on a sentryProxy thread update. """ db_name = "test_role_privilege_case_x_" + get_random_id(5) db_name_upper_case = "TEST_ROLE_PRIVILEGE_CASE_Y_" + get_random_id(5).upper() db_name_mixed_case = "TesT_Role_PRIVIlege_case_z" + get_random_id(5) role_name = "test_role_" + get_random_id(5) try: self.client.execute("create role {0}".format(role_name)) self.client.execute("grant all on server to {0}".format(role_name)) self.client.execute("grant role {0} to group `{1}`".format(role_name, grp.getgrnam(getuser()).gr_name)) self.client.execute("create database " + db_name) self.client.execute("create database " + db_name_upper_case) self.client.execute("create database " + db_name_mixed_case) self.client.execute( "create table if not exists {0}.test1(i int)".format(db_name)) self.client.execute("create table if not exists {0}.TEST2(i int)".format(db_name)) self.client.execute("create table if not exists {0}.Test3(i int)".format(db_name)) self.client.execute( "grant select on table {0}.test1 to {1}".format(db_name, role_name)) self.client.execute( "grant select on table {0}.TEST2 to {1}".format(db_name, role_name)) self.client.execute( "grant select on table {0}.TesT3 to {1}".format(db_name, role_name)) self.client.execute("grant all on database {0} to {1}".format(db_name, role_name)) self.client.execute( "grant all on database {0} to {1}".format(db_name_upper_case, role_name)) self.client.execute( "grant all on database {0} to {1}".format(db_name_mixed_case, role_name)) result = self.client.execute("show grant role {0}".format(role_name)) assert any('test1' in x for x in result.data) assert any('test2' in x for x in result.data) assert any('test3' in x for x in result.data) assert any(db_name_upper_case.lower() in x for x in result.data) assert any(db_name_mixed_case.lower() in x for x in result.data) # Sleep for 2 seconds and make sure that the privileges # on all 3 tables still persist on a sentryProxy thread # update. sentry_catalog_polling_frequency_s is set to 1 # seconds. sleep(2) result = self.client.execute("show grant role {0}".format(role_name)) assert any('test1' in x for x in result.data) assert any('test2' in x for x in result.data) assert any('test3' in x for x in result.data) assert any(db_name_upper_case.lower() in x for x in result.data) assert any(db_name_mixed_case.lower() in x for x in result.data) finally: self.client.execute("drop database if exists {0}".format(db_name_upper_case)) self.client.execute("drop database if exists {0}".format(db_name_mixed_case)) self.client.execute("drop database if exists {0} cascade".format(db_name)) self.client.execute("drop role {0}".format(role_name))
def __enter__(self): self._staging_exec_result_table = 'ExecutionResultsStaging_' + get_random_id( 5) self._staging_profile_table = 'RuntimeProfilesStaging_' + get_random_id( 5) self._create_new_table_as('ExecutionResults', self._staging_exec_result_table) self._create_new_table_as('RuntimeProfiles', self._staging_profile_table) return self
def test_role_update(self, vector): """IMPALA-5355: The initial update from the statestore has the privileges and roles in reverse order if a role was modified, but not the associated privilege. Verify that Impala is able to handle this. """ role_name = "test_role_" + get_random_id(5) try: self.client.execute("create role {0}".format(role_name)) self.client.execute("grant all on server to {0}".format(role_name)) # Wait a few seconds to make sure the update propagates to the statestore. sleep(3) # Update the role, increasing its catalog verion. self.client.execute("grant role {0} to group `{1}`".format( role_name, grp.getgrnam(getuser()).gr_name)) result = self.client.execute("show tables in functional") assert 'alltypes' in result.data privileges_before = self.client.execute("show grant role {0}".format(role_name)) # Wait a few seconds before restarting Impalad to make sure that the Catalog gets # updated. sleep(3) self.restart_first_impalad() verifier = MetricVerifier(self.cluster.impalads[0].service) verifier.wait_for_metric("catalog.ready", True) # Verify that we still have the right privileges after the first impalad was # restarted. result = self.client.execute("show tables in functional") assert 'alltypes' in result.data privileges_after = self.client.execute("show grant role {0}".format(role_name)) assert privileges_before.data == privileges_after.data finally: self.client.execute("drop role {0}".format(role_name))
def test_hive_udfs_missing_jar(self, vector): """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present on HDFS""" # Copy hive-exec.jar to a temporary file jar_path = "tmp/" + get_random_id(5) + ".jar" self.hdfs_client.copy('test-warehouse/hive-exec.jar', jar_path) drop_fn_stmt = "drop function if exists default.pi_missing_jar()" create_fn_stmt = "create function default.pi_missing_jar() returns double \ location '/%s' symbol='org.apache.hadoop.hive.ql.udf.UDFPI'" % jar_path cluster = ImpalaCluster() impalad = cluster.get_any_impalad() client = impalad.service.create_beeswax_client() # Create and drop functions with sync_ddl to make sure they are reflected # in every impalad. exec_option = vector.get_value('exec_option') exec_option['sync_ddl'] = 1 self.execute_query_expect_success(client, drop_fn_stmt, exec_option) self.execute_query_expect_success(client, create_fn_stmt, exec_option) # Delete the udf jar self.hdfs_client.delete_file_dir(jar_path) different_impalad = cluster.get_different_impalad(impalad) client = different_impalad.service.create_beeswax_client() # Run a query using the udf from an impalad other than the one # we used to create the function. This is to bypass loading from # the cache try: self.execute_query_using_client(client, "select default.pi_missing_jar()", vector) assert False, "Query expected to fail" except ImpalaBeeswaxException, e: assert "Failed to get file info" in str(e)
def _test_ownership(self): """Tests ownership privileges for databases and tables with ranger along with some known quirks in the implementation.""" test_user = getuser() test_db = "test_ranger_ownership_" + get_random_id(5).lower() # Create a test database as "admin" user. Owner is set accordingly. self._run_query_as_user("create database {0}".format(test_db), ADMIN, True) try: # Try to create a table under test_db as current user. It should fail. self._run_query_as_user( "create table {0}.foo(a int)".format(test_db), test_user, False) # Change the owner of the database to the current user. self._run_query_as_user( "alter database {0} set owner user {1}".format(test_db, test_user), ADMIN, True) self._run_query_as_user("refresh authorization", ADMIN, True) # Create should succeed now. self._run_query_as_user( "create table {0}.foo(a int)".format(test_db), test_user, True) # Run show tables on the db. The resulting list should be empty. This happens # because the created table's ownership information is not aggressively cached # by the current Catalog implementations. Hence the analysis pass does not # have access to the ownership information to verify if the current session # user is actually the owner. We need to fix this by caching the HMS metadata # more aggressively when the table loads. TODO(IMPALA-8937). result = \ self._run_query_as_user("show tables in {0}".format(test_db), test_user, True) assert len(result.data) == 0 # Run a simple query that warms up the table metadata and repeat SHOW TABLES. self._run_query_as_user("select * from {0}.foo".format(test_db), test_user, True) result = \ self._run_query_as_user("show tables in {0}".format(test_db), test_user, True) assert len(result.data) == 1 assert "foo" in result.data # Change the owner of the db back to the admin user self._run_query_as_user( "alter database {0} set owner user {1}".format(test_db, ADMIN), ADMIN, True) result = self._run_query_as_user( "show tables in {0}".format(test_db), test_user, False) err = "User '{0}' does not have privileges to access: {1}.*.*". \ format(test_user, test_db) assert err in str(result) # test_user is still the owner of the table, so select should work fine. self._run_query_as_user("select * from {0}.foo".format(test_db), test_user, True) # Change the table owner back to admin. self._run_query_as_user( "alter table {0}.foo set owner user {1}".format(test_db, ADMIN), ADMIN, True) # test_user should not be authorized to run the queries anymore. result = self._run_query_as_user( "select * from {0}.foo".format(test_db), test_user, False) err = ("AuthorizationException: User '{0}' does not have privileges to execute" + " 'SELECT' on: {1}.foo").format(test_user, test_db) assert err in str(result) finally: self._run_query_as_user("drop database {0} cascade".format(test_db), ADMIN, True)
def test_hive_udfs_missing_jar(self, vector, unique_database): """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present on HDFS""" # Copy hive-exec.jar to a temporary file jar_path = get_fs_path( "/test-warehouse/{0}.db/".format(unique_database) + get_random_id(5) + ".jar") hive_jar = get_fs_path("/test-warehouse/hive-exec.jar") self.filesystem_client.copy(hive_jar, jar_path) drop_fn_stmt = ("drop function if exists " "`{0}`.`pi_missing_jar`()".format(unique_database)) create_fn_stmt = ( "create function `{0}`.`pi_missing_jar`() returns double location '{1}' " "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format( unique_database, jar_path)) cluster = ImpalaCluster.get_e2e_test_cluster() impalad = cluster.get_any_impalad() client = impalad.service.create_beeswax_client() # Create and drop functions with sync_ddl to make sure they are reflected # in every impalad. exec_option = copy(vector.get_value('exec_option')) exec_option['sync_ddl'] = 1 self.execute_query_expect_success(client, drop_fn_stmt, exec_option) self.execute_query_expect_success(client, create_fn_stmt, exec_option) # Delete the udf jar check_call(["hadoop", "fs", "-rm", jar_path]) different_impalad = cluster.get_different_impalad(impalad) client = different_impalad.service.create_beeswax_client() # Run a query using the udf from an impalad other than the one # we used to create the function. This is to bypass loading from # the cache try: self.execute_query_using_client( client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector) assert False, "Query expected to fail" except ImpalaBeeswaxException, e: assert "Failed to get file info" in str(e)
def test_hive_udfs_missing_jar(self, vector, unique_database): """ IMPALA-2365: Impalad shouldn't crash if the udf jar isn't present on HDFS""" # Copy hive-exec.jar to a temporary file jar_path = get_fs_path("/test-warehouse/{0}.db/".format(unique_database) + get_random_id(5) + ".jar") hive_jar = get_fs_path("/test-warehouse/hive-exec.jar") check_call(["hadoop", "fs", "-cp", hive_jar, jar_path]) drop_fn_stmt = ( "drop function if exists " "`{0}`.`pi_missing_jar`()".format(unique_database)) create_fn_stmt = ( "create function `{0}`.`pi_missing_jar`() returns double location '{1}' " "symbol='org.apache.hadoop.hive.ql.udf.UDFPI'".format(unique_database, jar_path)) cluster = ImpalaCluster() impalad = cluster.get_any_impalad() client = impalad.service.create_beeswax_client() # Create and drop functions with sync_ddl to make sure they are reflected # in every impalad. exec_option = copy(vector.get_value('exec_option')) exec_option['sync_ddl'] = 1 self.execute_query_expect_success(client, drop_fn_stmt, exec_option) self.execute_query_expect_success(client, create_fn_stmt, exec_option) # Delete the udf jar check_call(["hadoop", "fs", "-rm", jar_path]) different_impalad = cluster.get_different_impalad(impalad) client = different_impalad.service.create_beeswax_client() # Run a query using the udf from an impalad other than the one # we used to create the function. This is to bypass loading from # the cache try: self.execute_query_using_client( client, "select `{0}`.`pi_missing_jar`()".format(unique_database), vector) assert False, "Query expected to fail" except ImpalaBeeswaxException, e: assert "Failed to get file info" in str(e)
def __enter__(self): self._staging_exec_result_table = 'ExecutionResultsStaging_' + get_random_id(5) self._staging_profile_table = 'RuntimeProfilesStaging_' + get_random_id(5) self._create_new_table_as('ExecutionResults', self._staging_exec_result_table) self._create_new_table_as('RuntimeProfiles', self._staging_profile_table) return self