def _get_delegation_tokens(self, username, delegation_token_dir): """ If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for the user we want to run the subprocess as. We have to do it here rather than in the subprocess because the subprocess does not have Kerberos credentials in that case. """ delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: current_user = cluster.user try: cluster.setuser(username) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile(dir=delegation_token_dir) token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) finally: cluster.setuser(current_user) return delegation_token_files
def _get_delegation_tokens(self, username, delegation_token_dir): """ If operating against Kerberized Hadoop, we'll need to have obtained delegation tokens for the user we want to run the subprocess as. We have to do it here rather than in the subprocess because the subprocess does not have Kerberos credentials in that case. """ delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.debug("Clusters to potentially acquire tokens for: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: current_user = cluster.user try: cluster.setuser(username) token = cluster.get_delegation_token(KERBEROS.HUE_PRINCIPAL.get()) token_file_no, path = tempfile.mkstemp(dir=delegation_token_dir) os.write(token_file_no, token) os.close(token_file_no) delegation_token_files.append(path) finally: cluster.setuser(current_user) return delegation_token_files
def test_non_default_cluster(): NON_DEFAULT_NAME = 'non_default' cluster.clear_caches() reset = ( conf.HDFS_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), conf.MR_CLUSTERS.set_for_testing({NON_DEFAULT_NAME: {}}), ) try: # This is indeed the only hdfs/mr cluster assert_equal(1, len(cluster.get_all_hdfs())) assert_equal(1, len(cluster.all_mrclusters())) assert_true(cluster.get_hdfs(NON_DEFAULT_NAME)) assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME)) cli = make_logged_in_client() # That we can get to a view without errors means that the middlewares work cli.get('/about') finally: for old_conf in reset: old_conf()
def test_non_default_cluster(): NON_DEFAULT_NAME = 'non_default' cluster.clear_caches() reset = ( conf.HDFS_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), conf.MR_CLUSTERS.set_for_testing({ NON_DEFAULT_NAME: { } }), ) try: # This is indeed the only hdfs/mr cluster assert_equal(1, len(cluster.get_all_hdfs())) assert_equal(1, len(cluster.all_mrclusters())) assert_true(cluster.get_hdfs(NON_DEFAULT_NAME)) assert_true(cluster.get_mrcluster(NON_DEFAULT_NAME)) cli = make_logged_in_client() # That we can get to a view without errors means that the middlewares work cli.get('/about') finally: for old_conf in reset: old_conf()
def run_bin_hadoop_step(self, step): """ user.name is used by FileSystem.getHomeDirectory(). The environment variables for _USER and _GROUPS are used by the aspectj aspect to overwrite Hadoop's notion of users and groups. """ java_properties = {} java_properties["hue.suffix"] = "-via-hue" java_properties["user.name"] = self.plan.user java_prop_str = " ".join("-D%s=%s" % (k,v) for k, v in java_properties.iteritems()) env = { 'HADOOP_HOME': hadoop.conf.HADOOP_HOME.get(), 'HADOOP_OPTS': "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str), 'HADOOP_CLASSPATH': ':'.join([jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get()]), 'HUE_JOBTRACE_LOG': self.internal_file_name("jobs"), 'HUE_JOBSUB_USER': self.plan.user, 'HUE_JOBSUB_GROUPS': ",".join(self.plan.groups), 'LANG': os.getenv('LANG', i18n.get_site_encoding()), } all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() delegation_token_files = [] merged_token_file = tempfile.NamedTemporaryFile() try: LOG.debug("all_clusters: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: cluster.setuser(self.plan.user) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile() token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) java_home = os.getenv('JAVA_HOME') if java_home: env["JAVA_HOME"] = java_home for k, v in env.iteritems(): assert v is not None, "Environment key %s missing value." % k base_args = [ hadoop.conf.HADOOP_BIN.get() ] if hadoop.conf.HADOOP_CONF_DIR.get(): base_args.append("--config") base_args.append(hadoop.conf.HADOOP_CONF_DIR.get()) if delegation_token_files: args = list(base_args) # Make a copy of the base args. args += ['jar', hadoop.conf.CREDENTIALS_MERGER_JAR.get(), merged_token_file.name] args += [token_file.name for token_file in delegation_token_files] LOG.debug("merging credentials files with comand: '%s'" % (' '.join(args),)) merge_pipe = subprocess.Popen(args, shell=False, close_fds=True) retcode = merge_pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d while trying to merge credentials" % (retcode,)) env['HADOOP_TOKEN_FILE_LOCATION'] = merged_token_file.name args = list(base_args) # Make a copy of the base args. args += step.arguments LOG.info("Starting %s. (Env: %s)", repr(args), repr(env)) LOG.info("Running: %s" % " ".join(args)) self.pipe = subprocess.Popen( args, stdin=None, cwd=self.work_dir, stdout=self.stdout, stderr=self.stderr, shell=False, close_fds=True, env=env) retcode = self.pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d" % retcode) LOG.info("bin/hadoop returned %d" % retcode) finally: for token_file in delegation_token_files + [merged_token_file]: token_file.close()
def run_bin_hadoop_step(self, step): """ user.name is used by FileSystem.getHomeDirectory(). The environment variables for _USER and _GROUPS are used by the aspectj aspect to overwrite Hadoop's notion of users and groups. """ java_properties = {} java_properties["hue.suffix"] = "-via-hue" java_properties["user.name"] = self.plan.user java_prop_str = " ".join("-D%s=%s" % (k, v) for k, v in java_properties.iteritems()) env = { "HADOOP_HOME": hadoop.conf.HADOOP_HOME.get(), "HADOOP_OPTS": "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str), "HADOOP_CLASSPATH": ":".join( [jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_STATIC_GROUP_MAPPING_CLASSPATH.get()] ), "HUE_JOBTRACE_LOG": self.internal_file_name("jobs"), "HUE_JOBSUB_USER": self.plan.user, "HUE_JOBSUB_GROUPS": ",".join(self.plan.groups), } delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.info("all_clusters: %s" % (repr(all_clusters),)) for cluster in all_clusters: if cluster.security_enabled: cluster.setuser(self.plan.user) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile() token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) if delegation_token_files: env["HADOOP_TOKEN_FILE_LOCATION"] = ",".join([token_file.name for token_file in delegation_token_files]) java_home = os.getenv("JAVA_HOME") if java_home: env["JAVA_HOME"] = java_home for k, v in env.iteritems(): assert v is not None, "Environment key %s missing value." % k args = [hadoop.conf.HADOOP_BIN.get()] if hadoop.conf.HADOOP_CONF_DIR.get(): args.append("--config") args.append(hadoop.conf.HADOOP_CONF_DIR.get()) args += step.arguments LOG.info("Starting %s. (Env: %s)", repr(args), repr(env)) LOG.info("Running: %s" % " ".join(args)) self.pipe = subprocess.Popen( args, stdin=None, cwd=self.work_dir, stdout=self.stdout, stderr=self.stderr, shell=False, close_fds=True, env=env, ) retcode = self.pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d" % retcode) LOG.info("bin/hadoop returned %d" % retcode) for token_file in delegation_token_files: token_file.close()
def run_bin_hadoop_step(self, step): """ user.name is used by FileSystem.getHomeDirectory(). The environment variables for _USER and _GROUPS are used by the aspectj aspect to overwrite Hadoop's notion of users and groups. """ java_properties = {} java_properties["hue.suffix"] = "-via-hue" java_properties["user.name"] = self.plan.user java_prop_str = " ".join("-D%s=%s" % (k, v) for k, v in java_properties.iteritems()) env = { 'HADOOP_HOME': hadoop.conf.HADOOP_HOME.get(), 'HADOOP_OPTS': "-javaagent:%s %s" % (jobsub.conf.ASPECTJWEAVER.get(), java_prop_str), 'HADOOP_CLASSPATH': ':'.join([ jobsub.conf.ASPECTPATH.get(), hadoop.conf.HADOOP_EXTRA_CLASSPATH_STRING.get() ]), 'HUE_JOBTRACE_LOG': self.internal_file_name("jobs"), 'HUE_JOBSUB_USER': self.plan.user, 'HUE_JOBSUB_GROUPS': ",".join(self.plan.groups), } delegation_token_files = [] all_clusters = [] all_clusters += all_mrclusters().values() all_clusters += get_all_hdfs().values() LOG.info("all_clusters: %s" % (repr(all_clusters), )) for cluster in all_clusters: if cluster.security_enabled: cluster.setuser(self.plan.user) token = cluster.get_delegation_token() token_file = tempfile.NamedTemporaryFile() token_file.write(token.delegationTokenBytes) token_file.flush() delegation_token_files.append(token_file) if delegation_token_files: env['HADOOP_TOKEN_FILE_LOCATION'] = ','.join( [token_file.name for token_file in delegation_token_files]) java_home = os.getenv('JAVA_HOME') if java_home: env["JAVA_HOME"] = java_home for k, v in env.iteritems(): assert v is not None, "Environment key %s missing value." % k args = [hadoop.conf.HADOOP_BIN.get()] if hadoop.conf.HADOOP_CONF_DIR.get(): args.append("--config") args.append(hadoop.conf.HADOOP_CONF_DIR.get()) args += step.arguments LOG.info("Starting %s. (Env: %s)", repr(args), repr(env)) LOG.info("Running: %s" % " ".join(args)) self.pipe = subprocess.Popen(args, stdin=None, cwd=self.work_dir, stdout=self.stdout, stderr=self.stderr, shell=False, close_fds=True, env=env) retcode = self.pipe.wait() if 0 != retcode: raise Exception("bin/hadoop returned non-zero %d" % retcode) LOG.info("bin/hadoop returned %d" % retcode) for token_file in delegation_token_files: token_file.close()