def run_shell(args): get_zk_service_config(args) main_class, options = deploy_utils.parse_shell_command( args, SHELL_COMMAND_INFO) if not main_class: return args.zookeeper_config.parse_generated_config_files(args) client_jaas = generate_client_jaas_config(args) jaas_fd, jaas_file = tempfile.mkstemp(suffix='zookeeper') os.write(jaas_fd, client_jaas) os.close(jaas_fd) zookeeper_opts = list() if deploy_utils.is_security_enabled(args): zookeeper_opts.append("-Djava.security.auth.login.config=%s" % jaas_file) zookeeper_opts.append( "-Djava.security.krb5.conf=%s/krb5-hadoop.conf" % deploy_utils.get_config_dir()) package_root = deploy_utils.get_artifact_package_root(args, args.zookeeper_config.cluster, "zookeeper") class_path = "%s/:%s/lib/*:%s/*" % (package_root, package_root, package_root) zk_address = "%s:%d" % ( deploy_utils.get_zk_address(args.zookeeper_config.cluster.name), args.zookeeper_config.jobs["zookeeper"].base_port) cmd = (["java", "-cp", class_path] + zookeeper_opts + [main_class, "-server", zk_address] + options) p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) p.wait()
def generate_run_scripts_params(args, host, job_name): job = args.yarn_config.jobs[job_name] supervisor_client = deploy_utils.get_supervisor_client( host, "yarn", args.yarn_config.cluster.name, job_name) artifact_and_version = "hadoop-" + args.yarn_config.cluster.version jar_dirs = "" for component in ["common", "mapreduce", "yarn", "hdfs"]: if jar_dirs: jar_dirs += ":" component_dir = ("$package_dir/share/hadoop/%s" % component) jar_dirs += "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir) service_env = "" for component_path in [ "HADOOP_COMMON_HOME", "HADOOP_HDFS_HOME", "YARN_HOME" ]: service_env += "export %s=$package_dir\n" % (component_path) script_dict = { "artifact": artifact_and_version, "job_name": job_name, "jar_dirs": jar_dirs, "run_dir": supervisor_client.get_run_dir(), "service_env": service_env, "params": '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory + '-XX:MaxPermSize=%dm ' % job.max_perm_size + '-XX:+DisableExplicitGC ' + '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' + '-XX:CMSInitiatingOccupancyFraction=80 ' + '-XX:+UseMembar ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' + '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name + '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' + '-Dyarn.log.dir=$log_dir ' + '-Dyarn.pid=$pid ' + '-Dyarn.cluster=%s ' % args.yarn_config.cluster.name + '-Dhadoop.policy.file=hadoop-policy.xml ' + '-Dhadoop.home.dir=$package_dir ' + '-Dhadoop.id.str=%s ' % args.remote_user + '-Djava.security.krb5.conf=$run_dir/krb5.conf ' + get_job_specific_params(args, job_name) } if deploy_utils.is_security_enabled(args): class_path_root = "$package_dir/share/hadoop" boot_class_path = ("%s/common/lib/hadoop-security-%s.jar" % (class_path_root, args.hdfs_config.cluster.version)) script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path script_dict["params"] += JOB_MAIN_CLASS[job_name] return script_dict
def generate_run_scripts_params(args, host, job_name): job = args.hdfs_config.jobs[job_name] supervisor_client = deploy_utils.get_supervisor_client( host, "hdfs", args.hdfs_config.cluster.name, job_name) artifact_and_version = "hadoop-" + args.hdfs_config.cluster.version jar_dirs = "" # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under # this dir but we also need access the webapps under this dir. for component in ["common", "hdfs"]: if jar_dirs: jar_dirs += ":" component_dir = ("$package_dir/share/hadoop/%s" % component) jar_dirs += "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir) script_dict = { "artifact": artifact_and_version, "job_name": job_name, "jar_dirs": jar_dirs, "run_dir": supervisor_client.get_run_dir(), "params": '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory + '-XX:MaxPermSize=%dm ' % job.max_perm_size + '-XX:+DisableExplicitGC ' + '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' + '-XX:CMSInitiatingOccupancyFraction=80 ' + '-XX:+UseMembar ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' + '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name + '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' + '-Dhdfs.log.dir=$log_dir ' + '-Dhdfs.pid=$pid ' + '-Dhdfs.cluster=%s ' % args.hdfs_config.cluster.name + '-Dhadoop.policy.file=hadoop-policy.xml ' + '-Dhadoop.home.dir=$package_dir ' + '-Djava.security.krb5.conf=$run_dir/krb5.conf ' + '-Dhadoop.id.str=%s ' % args.remote_user, } # config security-related params if deploy_utils.is_security_enabled(args): class_path_root = "$package_dir/share/hadoop/" boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % ( class_path_root, args.hdfs_config.cluster.version) script_dict["params"] += ('-Xbootclasspath/p:%s ' % boot_class_path + '-Dkerberos.instance=hadoop ') # finally, add the job's main class name script_dict["params"] += (get_job_specific_params(args, job_name) + JOB_MAIN_CLASS[job_name]) return script_dict
def generate_start_script(args, host, job_name): supervisor_client = deploy_utils.get_supervisor_client(host, "zookeeper", args.zookeeper_config.cluster.name, job_name) run_dir = supervisor_client.get_run_dir() artifact_and_version = "zookeeper-" + args.zookeeper_config.cluster.version component_dir = "$package_dir" # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under # this dir but we also need access the webapps under this dir. jar_dirs = "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir) job = args.zookeeper_config.jobs["zookeeper"] script_dict = { "artifact": artifact_and_version, "job_name": job_name, "jar_dirs": jar_dirs, "run_dir": run_dir, "params": '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory + '-XX:MaxPermSize=%dm ' % job.max_perm_size + '-XX:+DisableExplicitGC ' + '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' + '-XX:CMSInitiatingOccupancyFraction=80 ' + '-XX:+UseMembar ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' + '-Xloggc:$run_dir/stdout/zk_gc_${start_time}.log ' + '-Djava.net.preferIPv4Stack=true ' + '-Dzookeeper.log.dir=$log_dir ' + '-Dzookeeper.cluster=%s ' % args.zookeeper_config.cluster.name + '-Dzookeeper.tracelog.dir=$log_dir ', } # Config security if deploy_utils.is_security_enabled(args): script_dict["params"] += '-Dzookeeper.superUser=zk_admin ' script_dict["params"] += '-Djava.security.auth.login.config=$run_dir/jaas.conf ' script_dict["params"] += '-Djava.security.krb5.conf=$run_dir/krb5.conf ' script_dict["params"] += 'org.apache.zookeeper.server.quorum.QuorumPeerMain ' script_dict["params"] += '$run_dir/zookeeper.cfg' return deploy_utils.create_run_script( '%s/start.sh.tmpl' % deploy_utils.get_template_dir(), script_dict)
def generate_client_jaas_config(args): if not deploy_utils.is_security_enabled(args): return "" config_dict = args.zookeeper_config.configuration.generated_files["jaas-client.conf"] for key, value in config_dict.items()[1:]: if value != "true" and value != "false" and value.find("\"") == -1: config_dict[key] = "\"" + value + "\"" header_line = config_dict["headerLine"] return "Client {\n %s\n%s;\n};" % (header_line, "\n".join([" %s=%s" % (key, value) for (key, value) in config_dict.iteritems() if key != config_dict.keys()[0]]))
def run_shell(args): get_hbase_service_config(args) main_class, options = deploy_utils.parse_shell_command( args, SHELL_COMMAND_INFO) if not main_class: return # parse the service_config, suppose the instance_id is -1 args.hbase_config.parse_generated_config_files(args) core_site_dict = args.hbase_config.configuration.generated_files[ "core-site.xml"] hdfs_site_dict = args.hbase_config.configuration.generated_files[ "hdfs-site.xml"] hbase_site_dict = args.hbase_config.configuration.generated_files[ "hbase-site.xml"] hbase_opts = list() for key, value in core_site_dict.iteritems(): hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hdfs_site_dict.iteritems(): hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hbase_site_dict.iteritems(): hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) if deploy_utils.is_security_enabled(args): hbase_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" % deploy_utils.get_config_dir()) (jaas_fd, jaas_file) = tempfile.mkstemp() args.zookeeper_config.parse_generated_config_files(args) os.write(jaas_fd, deploy_zookeeper.generate_client_jaas_config(args)) os.close(jaas_fd) hbase_opts.append("-Djava.security.auth.login.config=%s" % jaas_file) package_root = deploy_utils.get_artifact_package_root( args, args.hbase_config.cluster, "hbase") class_path = "%s/:%s/lib/*:%s/*" % (package_root, package_root, package_root) cmd = ["java", "-cp", class_path] + hbase_opts + [main_class] if args.command[0] == "shell": cmd += ["-X+O", "%s/bin/hirb.rb" % package_root] cmd += options p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) return p.wait()
def generate_run_scripts_params(args, host, job_name): supervisor_client = deploy_utils.get_supervisor_client( host, "impala", args.impala_config.cluster.name, job_name) job = args.impala_config.jobs[job_name] impalad = args.impala_config.jobs["impalad"] statestored = args.impala_config.jobs["statestored"] artifact_and_version = "impala-" + args.impala_config.cluster.version script_dict = { "artifact": artifact_and_version, "job_name": job_name, "run_dir": supervisor_client.get_run_dir(), "ticket_cache": "$run_dir/impala.tc", "params": "-webserver_port=%d " % (job.base_port + 1) + "-be_port=%d " % (impalad.base_port + 2) + "-planservice_port=%d " % (impalad.base_port + 3) + "-state_store_port=%d " % statestored.base_port + "-state_store_subscriber_port=%d " % (statestored.base_port + 1) + "-mem_limit=20% " + # TODO make this configurable "-state_store_host=%s " % statestored.hosts[0] + "-kerberos_reinit_interval=1200 " + # 20hours "-webserver_doc_root=$run_dir/package " + "-webserver_interface=%s " % host + #"-use_statestore=false " + "-log_dir=$run_dir/log " + "-v=2 " + "-logbuflevel=-1 " + "-sasl_path=$run_dir/package/lib/sasl2 ", } if job_name == "impalad": script_dict["params"] += "-beeswax_port=%d " % impalad.base_port script_dict["params"] += "-hs2_port=%d " % (impalad.base_port + 4) if deploy_utils.is_security_enabled(args): script_dict["params"] += "-principal=%s/hadoop@%s " % ( args.impala_config.cluster.kerberos_username or "impala", args.impala_config.cluster.kerberos_realm) script_dict["params"] += "-keytab_file=%s/%s.keytab " % ( deploy_utils.HADOOP_CONF_PATH, args.impala_config.cluster.kerberos_username or "impala") script_dict["params"] += "-tgt_file=$run_dir/impala.tc " return script_dict
def run_shell(args): get_yarn_service_config(args) main_class, options = deploy_utils.parse_shell_command( args, SHELL_COMMAND_INFO) if not main_class: return # parse the service_config, suppose the instance_id is -1 args.yarn_config.parse_generated_config_files(args) core_site_dict = args.yarn_config.configuration.generated_files["core-site.xml"] hdfs_site_dict = args.yarn_config.configuration.generated_files["hdfs-site.xml"] mapred_site_dict = args.yarn_config.configuration.generated_files["mapred-site.xml"] yarn_site_dict = args.yarn_config.configuration.generated_files["yarn-site.xml"] hadoop_opts = list() for key, value in core_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hdfs_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in mapred_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in yarn_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) if deploy_utils.is_security_enabled(args): hadoop_opts.append( "-Djava.security.krb5.conf=%s/krb5-hadoop.conf" % deploy_utils.get_config_dir()) package_root = deploy_utils.get_artifact_package_root(args, args.yarn_config.cluster, "hadoop") lib_root = "%s/share/hadoop" % package_root class_path = "%s/etc/hadoop" % package_root for component in ["common", "hdfs", "mapreduce", "yarn"]: component_dir = "%s/%s" % (lib_root, component) class_path += ":%s/:%s/*:%s/lib/*" % (component_dir, component_dir, component_dir) cmd = (["java", "-cp", class_path] + hadoop_opts + [main_class] + options) p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) p.wait()
def run_shell(args): get_yarn_service_config(args) main_class, options = deploy_utils.parse_shell_command( args, SHELL_COMMAND_INFO) if not main_class: return core_site_dict = args.yarn_config.configuration.generated_files[ "core-site.xml"] hdfs_site_dict = args.yarn_config.configuration.generated_files[ "hdfs-site.xml"] mapred_site_dict = args.yarn_config.configuration.generated_files[ "mapred-site.xml"] yarn_site_dict = args.yarn_config.configuration.generated_files[ "yarn-site.xml"] hadoop_opts = list() for key, value in core_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hdfs_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in mapred_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in yarn_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) if deploy_utils.is_security_enabled(args): hadoop_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" % deploy_utils.get_config_dir()) package_root = deploy_utils.get_hadoop_package_root( args.yarn_config.cluster.version) lib_root = "%s/share/hadoop" % package_root class_path = "%s/etc/hadoop" % package_root for component in ["common", "hdfs", "mapreduce", "yarn"]: component_dir = "%s/%s" % (lib_root, component) class_path += ":%s/:%s/*:%s/lib/*" % (component_dir, component_dir, component_dir) cmd = (["java", "-cp", class_path] + hadoop_opts + [main_class] + options) p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) p.wait()
def generate_run_scripts_params(args, host, job_name): supervisor_client = deploy_utils.get_supervisor_client(host, "impala", args.impala_config.cluster.name, job_name) job = args.impala_config.jobs[job_name] impalad = args.impala_config.jobs["impalad"] statestored = args.impala_config.jobs["statestored"] artifact_and_version = "impala-" + args.impala_config.cluster.version script_dict = { "artifact": artifact_and_version, "job_name": job_name, "run_dir": supervisor_client.get_run_dir(), "ticket_cache": "$run_dir/impala.tc", "params": "-webserver_port=%d " % (job.base_port + 1) + "-be_port=%d " % (impalad.base_port + 2) + "-planservice_port=%d " % (impalad.base_port + 3) + "-state_store_port=%d " % statestored.base_port + "-state_store_subscriber_port=%d " % (statestored.base_port + 1) + "-mem_limit=20% " + # TODO make this configurable "-state_store_host=%s " % statestored.hosts[0] + "-kerberos_reinit_interval=1200 " + # 20hours "-webserver_doc_root=$run_dir/package " + "-webserver_interface=%s " % host + #"-use_statestore=false " + "-log_dir=$run_dir/log " + "-v=2 " + "-logbuflevel=-1 " + "-sasl_path=$run_dir/package/lib/sasl2 ", } if job_name == "impalad": script_dict["params"] += "-beeswax_port=%d " % impalad.base_port script_dict["params"] += "-hs2_port=%d " % (impalad.base_port + 4) if deploy_utils.is_security_enabled(args): script_dict["params"] += "-principal=%s/hadoop@%s " % ( args.impala_config.cluster.kerberos_username or "impala", args.impala_config.cluster.kerberos_realm) script_dict["params"] += "-keytab_file=%s/%s.keytab " % ( deploy_utils.HADOOP_CONF_PATH, args.impala_config.cluster.kerberos_username or "impala") script_dict["params"] += "-tgt_file=$run_dir/impala.tc " return script_dict
def run_shell(args): get_hbase_service_config(args) main_class, options = deploy_utils.parse_shell_command( args, SHELL_COMMAND_INFO) if not main_class: return # parse the service_config, suppose the instance_id is -1 args.hbase_config.parse_generated_config_files(args) core_site_dict = args.hbase_config.configuration.generated_files["core-site.xml"] hdfs_site_dict = args.hbase_config.configuration.generated_files["hdfs-site.xml"] hbase_site_dict = args.hbase_config.configuration.generated_files["hbase-site.xml"] hbase_opts = list() for key, value in core_site_dict.iteritems(): hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hdfs_site_dict.iteritems(): hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hbase_site_dict.iteritems(): hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) if deploy_utils.is_security_enabled(args): hbase_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" % deploy_utils.get_config_dir()) (jaas_fd, jaas_file) = tempfile.mkstemp() args.zookeeper_config.parse_generated_config_files(args) os.write(jaas_fd, deploy_zookeeper.generate_client_jaas_config(args)) os.close(jaas_fd) hbase_opts.append("-Djava.security.auth.login.config=%s" % jaas_file) package_root = deploy_utils.get_artifact_package_root(args, args.hbase_config.cluster, "hbase") class_path = "%s/:%s/lib/*:%s/*" % (package_root, package_root, package_root) cmd = ["java", "-cp", class_path] + hbase_opts + [main_class] if args.command[0] == "shell": cmd += ["-X+O", "%s/bin/hirb.rb" % package_root] cmd += options p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) return p.wait()
def run_shell(args): get_hdfs_service_config(args) main_class, options = deploy_utils.parse_shell_command( args, SHELL_COMMAND_INFO) if not main_class: return # parse the service_config, suppose the instance_id is -1 args.hdfs_config.parse_generated_config_files(args) core_site_dict = args.hdfs_config.configuration.generated_files[ "core-site.xml"] hdfs_site_dict = args.hdfs_config.configuration.generated_files[ "hdfs-site.xml"] hadoop_opts = list() for key, value in core_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hdfs_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) package_root = deploy_utils.get_artifact_package_root( args, args.hdfs_config.cluster, "hadoop") lib_root = "%s/share/hadoop" % package_root class_path = "%s/etc/hadoop" % package_root for component in ["common", "hdfs"]: component_dir = "%s/%s" % (lib_root, component) class_path += ":%s/:%s/*:%s/lib/*" % (component_dir, component_dir, component_dir) if deploy_utils.is_security_enabled(args): boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % ( lib_root, args.hdfs_config.cluster.version) hadoop_opts.append("-Xbootclasspath/p:%s" % boot_class_path) hadoop_opts.append("-Dkerberos.instance=hadoop") hadoop_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" % deploy_utils.get_config_dir()) cmd = (["java", "-cp", class_path] + hadoop_opts + [main_class] + options) p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) p.wait()
def run_shell(args): get_hdfs_service_config(args) main_class, options = deploy_utils.parse_shell_command( args, SHELL_COMMAND_INFO) if not main_class: return core_site_dict = args.hdfs_config.configuration.generated_files["core-site.xml"] hdfs_site_dict = args.hdfs_config.configuration.generated_files["hdfs-site.xml"] hadoop_opts = list() for key, value in core_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) for key, value in hdfs_site_dict.iteritems(): hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value)) package_root = deploy_utils.get_hadoop_package_root( args.hdfs_config.cluster.version) lib_root = "%s/share/hadoop" % package_root class_path = "%s/etc/hadoop" % package_root for component in ["common", "hdfs"]: component_dir = "%s/%s" % (lib_root, component) class_path += ":%s/:%s/*:%s/lib/*" % (component_dir, component_dir, component_dir) if deploy_utils.is_security_enabled(args): boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % (lib_root, args.hdfs_config.cluster.version) hadoop_opts.append("-Xbootclasspath/p:%s" % boot_class_path) hadoop_opts.append("-Dkerberos.instance=hadoop") hadoop_opts.append( "-Djava.security.krb5.conf=%s/krb5-hadoop.conf" % deploy_utils.get_config_dir()) cmd = (["java", "-cp", class_path] + hadoop_opts + [main_class] + options) p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr) p.wait()
def generate_run_scripts_params(args, host, job_name): job = args.hbase_config.jobs[job_name] supervisor_client = deploy_utils.get_supervisor_client( host, "hbase", args.hbase_config.cluster.name, job_name) artifact_and_version = "hbase-" + args.hbase_config.cluster.version component_dir = "$package_dir/" # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under # this dir but we also need access the webapps under this dir. jar_dirs = "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir) script_dict = { "artifact": artifact_and_version, "job_name": job_name, "jar_dirs": jar_dirs, "run_dir": supervisor_client.get_run_dir(), "params": '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn + '-Xss256k ' + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory + '-XX:MaxPermSize=%dm ' % job.max_perm_size + '-XX:PermSize=%dm ' % job.max_perm_size + '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' + '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name + '-XX:+UseMembar ' + '-XX:SurvivorRatio=1 ' + '-XX:+UseCMSCompactAtFullCollection ' + '-XX:CMSInitiatingOccupancyFraction=75 ' + '-XX:+UseCMSInitiatingOccupancyOnly ' + '-XX:+CMSParallelRemarkEnabled ' + '-XX:+UseNUMA ' + '-XX:+CMSClassUnloadingEnabled ' + '-XX:+PrintSafepointStatistics ' + '-XX:PrintSafepointStatisticsCount=1 ' + '-XX:+PrintHeapAtGC ' + '-XX:+PrintTenuringDistribution ' + '-XX:CMSMaxAbortablePrecleanTime=10000 ' + '-XX:TargetSurvivorRatio=80 ' + '-XX:+UseGCLogFileRotation ' + '-XX:NumberOfGCLogFiles=100 ' + '-XX:GCLogFileSize=128m ' + '-XX:CMSWaitDuration=2000 ' + '-XX:+CMSScavengeBeforeRemark ' + '-XX:+PrintPromotionFailure ' + '-XX:ConcGCThreads=8 ' + '-XX:ParallelGCThreads=8 ' + '-XX:PretenureSizeThreshold=4m ' + '-XX:+CMSConcurrentMTEnabled ' + '-XX:+ExplicitGCInvokesConcurrent ' + '-XX:+SafepointTimeout ' + '-XX:MonitorBound=16384 ' + '-XX:OldPLABSize=16 ' + '-XX:-ResizeOldPLAB ' + '-XX:-UseBiasedLocking ' + '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' + '-Dhbase.log.dir=$log_dir ' + '-Dhbase.pid=$pid ' + '-Dhbase.cluster=%s ' % args.hbase_config.cluster.name + '-Dhbase.policy.file=hbase-policy.xml ' + '-Dhbase.home.dir=$package_dir ' + '-Djava.security.krb5.conf=$run_dir/krb5.conf ' + '-Dhbase.id.str=%s ' % args.remote_user + get_job_specific_params(args, job_name), } if deploy_utils.is_security_enabled(args): jaas_path = "%s/jaas.conf" % supervisor_client.get_run_dir() script_dict[ "params"] += "-Djava.security.auth.login.config=%s " % jaas_path boot_class_path = ("$package_dir/lib/hadoop-security-%s.jar" % args.hdfs_config.cluster.version) script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path script_dict["params"] += JOB_MAIN_CLASS[job_name] return script_dict
def generate_run_scripts_params(args, host, job_name): job = args.hdfs_config.jobs[job_name] supervisor_client = deploy_utils.get_supervisor_client(host, "hdfs", args.hdfs_config.cluster.name, job_name) artifact_and_version = "hadoop-" + args.hdfs_config.cluster.version jar_dirs = "" # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under # this dir but we also need access the webapps under this dir. for component in ["common", "hdfs"]: if jar_dirs: jar_dirs += ":" component_dir = ("$package_dir/share/hadoop/%s" % component) jar_dirs += "%s/:%s/lib/*:%s/*" % ( component_dir, component_dir, component_dir) script_dict = { "artifact": artifact_and_version, "job_name": job_name, "jar_dirs": jar_dirs, "run_dir": supervisor_client.get_run_dir(), "params": '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory + '-XX:MaxPermSize=%dm ' % job.max_perm_size + '-XX:+DisableExplicitGC ' + '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' + '-XX:CMSInitiatingOccupancyFraction=80 ' + '-XX:+UseMembar ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' + '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name + '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' + '-Dhdfs.log.dir=$log_dir ' + '-Dhdfs.pid=$pid ' + '-Dhdfs.cluster=%s ' % args.hdfs_config.cluster.name + '-Dhadoop.policy.file=hadoop-policy.xml ' + '-Dhadoop.home.dir=$package_dir ' + '-Djava.security.krb5.conf=$run_dir/krb5.conf ' + '-Dhadoop.id.str=%s ' % args.remote_user, } # config security-related params if deploy_utils.is_security_enabled(args): class_path_root = "$package_dir/share/hadoop/" boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % ( class_path_root, args.hdfs_config.cluster.version) script_dict["params"] += ('-Xbootclasspath/p:%s ' % boot_class_path + '-Dkerberos.instance=hadoop ') # finally, add the job's main class name script_dict["params"] += (get_job_specific_params(args, job_name) + JOB_MAIN_CLASS[job_name]) return script_dict
def generate_run_scripts_params(args, host, job_name): job = args.yarn_config.jobs[job_name] supervisor_client = deploy_utils.get_supervisor_client(host, "yarn", args.yarn_config.cluster.name, job_name) artifact_and_version = "hadoop-" + args.yarn_config.cluster.version jar_dirs = "" for component in ["common", "mapreduce", "yarn", "hdfs"]: if jar_dirs: jar_dirs += ":" component_dir = ("$package_dir/share/hadoop/%s" % component) jar_dirs += "%s/:%s/lib/*:%s/*" % ( component_dir, component_dir, component_dir) service_env = "" for component_path in ["HADOOP_COMMON_HOME", "HADOOP_HDFS_HOME", "YARN_HOME"]: service_env += "export %s=$package_dir\n" % (component_path) script_dict = { "artifact": artifact_and_version, "job_name": job_name, "jar_dirs": jar_dirs, "run_dir": supervisor_client.get_run_dir(), "service_env": service_env, "params": '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory + '-XX:MaxPermSize=%dm ' % job.max_perm_size + '-XX:+DisableExplicitGC ' + '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' + '-XX:CMSInitiatingOccupancyFraction=80 ' + '-XX:+UseMembar ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' + '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name + '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' + '-Dyarn.log.dir=$log_dir ' + '-Dyarn.pid=$pid ' + '-Dyarn.cluster=%s ' % args.yarn_config.cluster.name + '-Dhadoop.policy.file=hadoop-policy.xml ' + '-Dhadoop.home.dir=$package_dir ' + '-Dhadoop.id.str=%s ' % args.remote_user + '-Djava.security.krb5.conf=$run_dir/krb5.conf ' + get_job_specific_params(args, job_name) } if deploy_utils.is_security_enabled(args): class_path_root = "$package_dir/share/hadoop" boot_class_path = ("%s/common/lib/hadoop-security-%s.jar" % ( class_path_root, args.hdfs_config.cluster.version)) script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path script_dict["params"] += JOB_MAIN_CLASS[job_name] return script_dict
def generate_run_scripts_params(args, host, job_name): job = args.hbase_config.jobs[job_name] supervisor_client = deploy_utils.get_supervisor_client(host, "hbase", args.hbase_config.cluster.name, job_name) artifact_and_version = "hbase-" + args.hbase_config.cluster.version component_dir = "$package_dir/" # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under # this dir but we also need access the webapps under this dir. jar_dirs = "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir) script_dict = { "artifact": artifact_and_version, "job_name": job_name, "jar_dirs": jar_dirs, "run_dir": supervisor_client.get_run_dir(), "params": '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn + '-Xss256k ' + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory + '-XX:MaxPermSize=%dm ' % job.max_perm_size + '-XX:PermSize=%dm ' % job.max_perm_size + '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' + '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name + '-XX:+UseMembar ' + '-XX:SurvivorRatio=1 ' + '-XX:+UseCMSCompactAtFullCollection ' + '-XX:CMSInitiatingOccupancyFraction=75 ' + '-XX:+UseCMSInitiatingOccupancyOnly ' + '-XX:+CMSParallelRemarkEnabled ' + '-XX:+UseNUMA ' + '-XX:+CMSClassUnloadingEnabled ' + '-XX:+PrintSafepointStatistics ' + '-XX:PrintSafepointStatisticsCount=1 ' + '-XX:+PrintHeapAtGC ' + '-XX:+PrintTenuringDistribution ' + '-XX:CMSMaxAbortablePrecleanTime=10000 ' + '-XX:TargetSurvivorRatio=80 ' + '-XX:+UseGCLogFileRotation ' + '-XX:NumberOfGCLogFiles=100 ' + '-XX:GCLogFileSize=128m ' + '-XX:CMSWaitDuration=2000 ' + '-XX:+CMSScavengeBeforeRemark ' + '-XX:+PrintPromotionFailure ' + '-XX:ConcGCThreads=8 ' + '-XX:ParallelGCThreads=8 ' + '-XX:PretenureSizeThreshold=4m ' + '-XX:+CMSConcurrentMTEnabled ' + '-XX:+ExplicitGCInvokesConcurrent ' + '-XX:+SafepointTimeout ' + '-XX:MonitorBound=16384 ' + '-XX:OldPLABSize=16 ' + '-XX:-ResizeOldPLAB ' + '-XX:-UseBiasedLocking ' + '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' + '-Dhbase.log.dir=$log_dir ' + '-Dhbase.pid=$pid ' + '-Dhbase.cluster=%s ' % args.hbase_config.cluster.name + '-Dhbase.policy.file=hbase-policy.xml ' + '-Dhbase.home.dir=$package_dir ' + '-Djava.security.krb5.conf=$run_dir/krb5.conf ' + '-Dhbase.id.str=%s ' % args.remote_user + get_job_specific_params(args, job_name), } if deploy_utils.is_security_enabled(args): jaas_path = "%s/jaas.conf" % supervisor_client.get_run_dir() script_dict["params"] += "-Djava.security.auth.login.config=%s " % jaas_path boot_class_path = ("$package_dir/lib/hadoop-security-%s.jar" % args.hdfs_config.cluster.version) script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path script_dict["params"] += JOB_MAIN_CLASS[job_name] return script_dict