def run_shell(args):
  get_zk_service_config(args)

  main_class, options = deploy_utils.parse_shell_command(
      args, SHELL_COMMAND_INFO)
  if not main_class:
    return

  args.zookeeper_config.parse_generated_config_files(args)

  client_jaas = generate_client_jaas_config(args)
  jaas_fd, jaas_file = tempfile.mkstemp(suffix='zookeeper')
  os.write(jaas_fd, client_jaas)
  os.close(jaas_fd)
  zookeeper_opts = list()
  if deploy_utils.is_security_enabled(args):
    zookeeper_opts.append("-Djava.security.auth.login.config=%s" % jaas_file)
    zookeeper_opts.append(
      "-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
      deploy_utils.get_config_dir())

  package_root = deploy_utils.get_artifact_package_root(args,
      args.zookeeper_config.cluster, "zookeeper")
  class_path = "%s/:%s/lib/*:%s/*" % (package_root, package_root, package_root)

  zk_address = "%s:%d" % (
      deploy_utils.get_zk_address(args.zookeeper_config.cluster.name),
      args.zookeeper_config.jobs["zookeeper"].base_port)

  cmd = (["java", "-cp", class_path] + zookeeper_opts + [main_class,
      "-server", zk_address] + options)
  p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
  p.wait()
Beispiel #2
0
def run_shell(args):
  get_zk_service_config(args)

  main_class, options = deploy_utils.parse_shell_command(
      args, SHELL_COMMAND_INFO)
  if not main_class:
    return

  args.zookeeper_config.parse_generated_config_files(args)

  client_jaas = generate_client_jaas_config(args)
  jaas_fd, jaas_file = tempfile.mkstemp(suffix='zookeeper')
  os.write(jaas_fd, client_jaas)
  os.close(jaas_fd)
  zookeeper_opts = list()
  if deploy_utils.is_security_enabled(args):
    zookeeper_opts.append("-Djava.security.auth.login.config=%s" % jaas_file)
    zookeeper_opts.append(
      "-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
      deploy_utils.get_config_dir())

  package_root = deploy_utils.get_artifact_package_root(args,
      args.zookeeper_config.cluster, "zookeeper")
  class_path = "%s/:%s/lib/*:%s/*" % (package_root, package_root, package_root)

  zk_address = "%s:%d" % (
      deploy_utils.get_zk_address(args.zookeeper_config.cluster.name),
      args.zookeeper_config.jobs["zookeeper"].base_port)

  cmd = (["java", "-cp", class_path] + zookeeper_opts + [main_class,
      "-server", zk_address] + options)
  p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
  p.wait()
Beispiel #3
0
def generate_run_scripts_params(args, host, job_name):
    job = args.yarn_config.jobs[job_name]

    supervisor_client = deploy_utils.get_supervisor_client(
        host, "yarn", args.yarn_config.cluster.name, job_name)

    artifact_and_version = "hadoop-" + args.yarn_config.cluster.version

    jar_dirs = ""
    for component in ["common", "mapreduce", "yarn", "hdfs"]:
        if jar_dirs: jar_dirs += ":"
        component_dir = ("$package_dir/share/hadoop/%s" % component)
        jar_dirs += "%s/:%s/lib/*:%s/*" % (component_dir, component_dir,
                                           component_dir)

    service_env = ""
    for component_path in [
            "HADOOP_COMMON_HOME", "HADOOP_HDFS_HOME", "YARN_HOME"
    ]:
        service_env += "export %s=$package_dir\n" % (component_path)

    script_dict = {
        "artifact":
        artifact_and_version,
        "job_name":
        job_name,
        "jar_dirs":
        jar_dirs,
        "run_dir":
        supervisor_client.get_run_dir(),
        "service_env":
        service_env,
        "params":
        '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn +
        '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
        '-XX:MaxPermSize=%dm ' % job.max_perm_size +
        '-XX:+DisableExplicitGC ' + '-XX:+HeapDumpOnOutOfMemoryError ' +
        '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' +
        '-XX:+UseConcMarkSweepGC ' + '-XX:CMSInitiatingOccupancyFraction=80 ' +
        '-XX:+UseMembar ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' +
        '-XX:+PrintGCDateStamps ' +
        '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name +
        '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' +
        '-Dyarn.log.dir=$log_dir ' + '-Dyarn.pid=$pid ' +
        '-Dyarn.cluster=%s ' % args.yarn_config.cluster.name +
        '-Dhadoop.policy.file=hadoop-policy.xml ' +
        '-Dhadoop.home.dir=$package_dir ' +
        '-Dhadoop.id.str=%s ' % args.remote_user +
        '-Djava.security.krb5.conf=$run_dir/krb5.conf ' +
        get_job_specific_params(args, job_name)
    }

    if deploy_utils.is_security_enabled(args):
        class_path_root = "$package_dir/share/hadoop"
        boot_class_path = ("%s/common/lib/hadoop-security-%s.jar" %
                           (class_path_root, args.hdfs_config.cluster.version))
        script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path

    script_dict["params"] += JOB_MAIN_CLASS[job_name]
    return script_dict
Beispiel #4
0
def generate_run_scripts_params(args, host, job_name):
    job = args.hdfs_config.jobs[job_name]

    supervisor_client = deploy_utils.get_supervisor_client(
        host, "hdfs", args.hdfs_config.cluster.name, job_name)

    artifact_and_version = "hadoop-" + args.hdfs_config.cluster.version

    jar_dirs = ""
    # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under
    # this dir but we also need access the webapps under this dir.
    for component in ["common", "hdfs"]:
        if jar_dirs: jar_dirs += ":"
        component_dir = ("$package_dir/share/hadoop/%s" % component)
        jar_dirs += "%s/:%s/lib/*:%s/*" % (component_dir, component_dir,
                                           component_dir)

    script_dict = {
        "artifact":
        artifact_and_version,
        "job_name":
        job_name,
        "jar_dirs":
        jar_dirs,
        "run_dir":
        supervisor_client.get_run_dir(),
        "params":
        '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn +
        '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
        '-XX:MaxPermSize=%dm ' % job.max_perm_size +
        '-XX:+DisableExplicitGC ' + '-XX:+HeapDumpOnOutOfMemoryError ' +
        '-XX:HeapDumpPath=$log_dir ' + '-XX:+PrintGCApplicationStoppedTime ' +
        '-XX:+UseConcMarkSweepGC ' + '-XX:CMSInitiatingOccupancyFraction=80 ' +
        '-XX:+UseMembar ' + '-verbose:gc ' + '-XX:+PrintGCDetails ' +
        '-XX:+PrintGCDateStamps ' +
        '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name +
        '-Dproc_%s ' % job_name + '-Djava.net.preferIPv4Stack=true ' +
        '-Dhdfs.log.dir=$log_dir ' + '-Dhdfs.pid=$pid ' +
        '-Dhdfs.cluster=%s ' % args.hdfs_config.cluster.name +
        '-Dhadoop.policy.file=hadoop-policy.xml ' +
        '-Dhadoop.home.dir=$package_dir ' +
        '-Djava.security.krb5.conf=$run_dir/krb5.conf ' +
        '-Dhadoop.id.str=%s ' % args.remote_user,
    }

    # config security-related params
    if deploy_utils.is_security_enabled(args):
        class_path_root = "$package_dir/share/hadoop/"
        boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % (
            class_path_root, args.hdfs_config.cluster.version)
        script_dict["params"] += ('-Xbootclasspath/p:%s ' % boot_class_path +
                                  '-Dkerberos.instance=hadoop ')

    # finally, add the job's main class name
    script_dict["params"] += (get_job_specific_params(args, job_name) +
                              JOB_MAIN_CLASS[job_name])
    return script_dict
Beispiel #5
0
def generate_start_script(args, host, job_name):
  supervisor_client = deploy_utils.get_supervisor_client(host,
      "zookeeper", args.zookeeper_config.cluster.name, job_name)
  run_dir = supervisor_client.get_run_dir()

  artifact_and_version = "zookeeper-" + args.zookeeper_config.cluster.version
  component_dir = "$package_dir"
  # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under
  # this dir but we also need access the webapps under this dir.
  jar_dirs = "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir)
  job = args.zookeeper_config.jobs["zookeeper"]

  script_dict = {
      "artifact": artifact_and_version,
      "job_name": job_name,
      "jar_dirs": jar_dirs,
      "run_dir": run_dir,
      "params":
          '-Xmx%dm ' % job.xmx +
          '-Xms%dm ' % job.xms +
          '-Xmn%dm ' % job.xmn +
          '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
          '-XX:MaxPermSize=%dm ' % job.max_perm_size +
          '-XX:+DisableExplicitGC ' +
          '-XX:+HeapDumpOnOutOfMemoryError ' +
          '-XX:HeapDumpPath=$log_dir ' +
          '-XX:+PrintGCApplicationStoppedTime ' +
          '-XX:+UseConcMarkSweepGC ' +
          '-XX:CMSInitiatingOccupancyFraction=80 ' +
          '-XX:+UseMembar ' +
          '-verbose:gc ' +
          '-XX:+PrintGCDetails ' +
          '-XX:+PrintGCDateStamps ' +
          '-Xloggc:$run_dir/stdout/zk_gc_${start_time}.log ' +
          '-Djava.net.preferIPv4Stack=true ' +
          '-Dzookeeper.log.dir=$log_dir ' +
          '-Dzookeeper.cluster=%s ' % args.zookeeper_config.cluster.name +
          '-Dzookeeper.tracelog.dir=$log_dir ',
  }

  # Config security
  if deploy_utils.is_security_enabled(args):
    script_dict["params"] += '-Dzookeeper.superUser=zk_admin '
    script_dict["params"] += '-Djava.security.auth.login.config=$run_dir/jaas.conf '
    script_dict["params"] += '-Djava.security.krb5.conf=$run_dir/krb5.conf '

  script_dict["params"] += 'org.apache.zookeeper.server.quorum.QuorumPeerMain '
  script_dict["params"] += '$run_dir/zookeeper.cfg'

  return deploy_utils.create_run_script(
      '%s/start.sh.tmpl' % deploy_utils.get_template_dir(),
      script_dict)
Beispiel #6
0
def generate_start_script(args, host, job_name):
  supervisor_client = deploy_utils.get_supervisor_client(host,
      "zookeeper", args.zookeeper_config.cluster.name, job_name)
  run_dir = supervisor_client.get_run_dir()

  artifact_and_version = "zookeeper-" + args.zookeeper_config.cluster.version
  component_dir = "$package_dir"
  # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under
  # this dir but we also need access the webapps under this dir.
  jar_dirs = "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir)
  job = args.zookeeper_config.jobs["zookeeper"]

  script_dict = {
      "artifact": artifact_and_version,
      "job_name": job_name,
      "jar_dirs": jar_dirs,
      "run_dir": run_dir,
      "params":
          '-Xmx%dm ' % job.xmx +
          '-Xms%dm ' % job.xms +
          '-Xmn%dm ' % job.xmn +
          '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
          '-XX:MaxPermSize=%dm ' % job.max_perm_size +
          '-XX:+DisableExplicitGC ' +
          '-XX:+HeapDumpOnOutOfMemoryError ' +
          '-XX:HeapDumpPath=$log_dir ' +
          '-XX:+PrintGCApplicationStoppedTime ' +
          '-XX:+UseConcMarkSweepGC ' +
          '-XX:CMSInitiatingOccupancyFraction=80 ' +
          '-XX:+UseMembar ' +
          '-verbose:gc ' +
          '-XX:+PrintGCDetails ' +
          '-XX:+PrintGCDateStamps ' +
          '-Xloggc:$run_dir/stdout/zk_gc_${start_time}.log ' +
          '-Djava.net.preferIPv4Stack=true ' +
          '-Dzookeeper.log.dir=$log_dir ' +
          '-Dzookeeper.cluster=%s ' % args.zookeeper_config.cluster.name +
          '-Dzookeeper.tracelog.dir=$log_dir ',
  }

  # Config security
  if deploy_utils.is_security_enabled(args):
    script_dict["params"] += '-Dzookeeper.superUser=zk_admin '
    script_dict["params"] += '-Djava.security.auth.login.config=$run_dir/jaas.conf '
    script_dict["params"] += '-Djava.security.krb5.conf=$run_dir/krb5.conf '

  script_dict["params"] += 'org.apache.zookeeper.server.quorum.QuorumPeerMain '
  script_dict["params"] += '$run_dir/zookeeper.cfg'

  return deploy_utils.create_run_script(
      '%s/start.sh.tmpl' % deploy_utils.get_template_dir(),
      script_dict)
Beispiel #7
0
def generate_client_jaas_config(args):
  if not deploy_utils.is_security_enabled(args):
    return ""

  config_dict = args.zookeeper_config.configuration.generated_files["jaas-client.conf"]

  for key, value in config_dict.items()[1:]:
    if value != "true" and value != "false" and value.find("\"") == -1:
      config_dict[key] = "\"" + value + "\""

  header_line = config_dict["headerLine"]
  return "Client {\n  %s\n%s;\n};" % (header_line,
      "\n".join(["  %s=%s" % (key, value)
        for (key, value) in config_dict.iteritems() if key != config_dict.keys()[0]]))
def generate_client_jaas_config(args):
  if not deploy_utils.is_security_enabled(args):
    return ""

  config_dict = args.zookeeper_config.configuration.generated_files["jaas-client.conf"]

  for key, value in config_dict.items()[1:]:
    if value != "true" and value != "false" and value.find("\"") == -1:
      config_dict[key] = "\"" + value + "\""

  header_line = config_dict["headerLine"]
  return "Client {\n  %s\n%s;\n};" % (header_line,
      "\n".join(["  %s=%s" % (key, value)
        for (key, value) in config_dict.iteritems() if key != config_dict.keys()[0]]))
Beispiel #9
0
def run_shell(args):
    get_hbase_service_config(args)

    main_class, options = deploy_utils.parse_shell_command(
        args, SHELL_COMMAND_INFO)
    if not main_class:
        return

    # parse the service_config, suppose the instance_id is -1
    args.hbase_config.parse_generated_config_files(args)
    core_site_dict = args.hbase_config.configuration.generated_files[
        "core-site.xml"]
    hdfs_site_dict = args.hbase_config.configuration.generated_files[
        "hdfs-site.xml"]
    hbase_site_dict = args.hbase_config.configuration.generated_files[
        "hbase-site.xml"]

    hbase_opts = list()
    for key, value in core_site_dict.iteritems():
        hbase_opts.append("-D%s%s=%s" %
                          (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))
    for key, value in hdfs_site_dict.iteritems():
        hbase_opts.append("-D%s%s=%s" %
                          (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))
    for key, value in hbase_site_dict.iteritems():
        hbase_opts.append("-D%s%s=%s" %
                          (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))

    if deploy_utils.is_security_enabled(args):
        hbase_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
                          deploy_utils.get_config_dir())

        (jaas_fd, jaas_file) = tempfile.mkstemp()
        args.zookeeper_config.parse_generated_config_files(args)
        os.write(jaas_fd, deploy_zookeeper.generate_client_jaas_config(args))
        os.close(jaas_fd)
        hbase_opts.append("-Djava.security.auth.login.config=%s" % jaas_file)

    package_root = deploy_utils.get_artifact_package_root(
        args, args.hbase_config.cluster, "hbase")
    class_path = "%s/:%s/lib/*:%s/*" % (package_root, package_root,
                                        package_root)

    cmd = ["java", "-cp", class_path] + hbase_opts + [main_class]
    if args.command[0] == "shell":
        cmd += ["-X+O", "%s/bin/hirb.rb" % package_root]
    cmd += options
    p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
    return p.wait()
Beispiel #10
0
def generate_run_scripts_params(args, host, job_name):
    supervisor_client = deploy_utils.get_supervisor_client(
        host, "impala", args.impala_config.cluster.name, job_name)
    job = args.impala_config.jobs[job_name]
    impalad = args.impala_config.jobs["impalad"]
    statestored = args.impala_config.jobs["statestored"]

    artifact_and_version = "impala-" + args.impala_config.cluster.version
    script_dict = {
        "artifact":
        artifact_and_version,
        "job_name":
        job_name,
        "run_dir":
        supervisor_client.get_run_dir(),
        "ticket_cache":
        "$run_dir/impala.tc",
        "params":
        "-webserver_port=%d " % (job.base_port + 1) + "-be_port=%d " %
        (impalad.base_port + 2) + "-planservice_port=%d " %
        (impalad.base_port + 3) +
        "-state_store_port=%d " % statestored.base_port +
        "-state_store_subscriber_port=%d " % (statestored.base_port + 1) +
        "-mem_limit=20% " +  # TODO make this configurable
        "-state_store_host=%s " % statestored.hosts[0] +
        "-kerberos_reinit_interval=1200 " +  # 20hours
        "-webserver_doc_root=$run_dir/package " +
        "-webserver_interface=%s " % host +
        #"-use_statestore=false " +
        "-log_dir=$run_dir/log " + "-v=2 " + "-logbuflevel=-1 " +
        "-sasl_path=$run_dir/package/lib/sasl2 ",
    }

    if job_name == "impalad":
        script_dict["params"] += "-beeswax_port=%d " % impalad.base_port
        script_dict["params"] += "-hs2_port=%d " % (impalad.base_port + 4)

    if deploy_utils.is_security_enabled(args):
        script_dict["params"] += "-principal=%s/hadoop@%s " % (
            args.impala_config.cluster.kerberos_username
            or "impala", args.impala_config.cluster.kerberos_realm)
        script_dict["params"] += "-keytab_file=%s/%s.keytab " % (
            deploy_utils.HADOOP_CONF_PATH,
            args.impala_config.cluster.kerberos_username or "impala")
        script_dict["params"] += "-tgt_file=$run_dir/impala.tc "

    return script_dict
Beispiel #11
0
def run_shell(args):
  get_yarn_service_config(args)

  main_class, options = deploy_utils.parse_shell_command(
      args, SHELL_COMMAND_INFO)
  if not main_class:
    return

  # parse the service_config, suppose the instance_id is -1
  args.yarn_config.parse_generated_config_files(args)
  core_site_dict = args.yarn_config.configuration.generated_files["core-site.xml"]
  hdfs_site_dict = args.yarn_config.configuration.generated_files["hdfs-site.xml"]
  mapred_site_dict = args.yarn_config.configuration.generated_files["mapred-site.xml"]
  yarn_site_dict = args.yarn_config.configuration.generated_files["yarn-site.xml"]

  hadoop_opts = list()
  for key, value in core_site_dict.iteritems():
    hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))
  for key, value in hdfs_site_dict.iteritems():
    hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))
  for key, value in mapred_site_dict.iteritems():
    hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))
  for key, value in yarn_site_dict.iteritems():
    hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))

  if deploy_utils.is_security_enabled(args):
    hadoop_opts.append(
        "-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
        deploy_utils.get_config_dir())

  package_root = deploy_utils.get_artifact_package_root(args,
      args.yarn_config.cluster, "hadoop")
  lib_root = "%s/share/hadoop" % package_root
  class_path = "%s/etc/hadoop" % package_root
  for component in ["common", "hdfs", "mapreduce", "yarn"]:
    component_dir = "%s/%s" % (lib_root, component)
    class_path += ":%s/:%s/*:%s/lib/*" % (component_dir,
        component_dir, component_dir)

  cmd = (["java", "-cp", class_path] + hadoop_opts +
      [main_class] + options)
  p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
  p.wait()
Beispiel #12
0
def run_shell(args):
    get_yarn_service_config(args)

    main_class, options = deploy_utils.parse_shell_command(
        args, SHELL_COMMAND_INFO)
    if not main_class:
        return

    core_site_dict = args.yarn_config.configuration.generated_files[
        "core-site.xml"]
    hdfs_site_dict = args.yarn_config.configuration.generated_files[
        "hdfs-site.xml"]
    mapred_site_dict = args.yarn_config.configuration.generated_files[
        "mapred-site.xml"]
    yarn_site_dict = args.yarn_config.configuration.generated_files[
        "yarn-site.xml"]

    hadoop_opts = list()
    for key, value in core_site_dict.iteritems():
        hadoop_opts.append("-D%s%s=%s" %
                           (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))
    for key, value in hdfs_site_dict.iteritems():
        hadoop_opts.append("-D%s%s=%s" %
                           (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))
    for key, value in mapred_site_dict.iteritems():
        hadoop_opts.append("-D%s%s=%s" %
                           (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))
    for key, value in yarn_site_dict.iteritems():
        hadoop_opts.append("-D%s%s=%s" %
                           (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))

    if deploy_utils.is_security_enabled(args):
        hadoop_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
                           deploy_utils.get_config_dir())

    package_root = deploy_utils.get_hadoop_package_root(
        args.yarn_config.cluster.version)
    lib_root = "%s/share/hadoop" % package_root
    class_path = "%s/etc/hadoop" % package_root
    for component in ["common", "hdfs", "mapreduce", "yarn"]:
        component_dir = "%s/%s" % (lib_root, component)
        class_path += ":%s/:%s/*:%s/lib/*" % (component_dir, component_dir,
                                              component_dir)

    cmd = (["java", "-cp", class_path] + hadoop_opts + [main_class] + options)
    p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
    p.wait()
Beispiel #13
0
def generate_run_scripts_params(args, host, job_name):
  supervisor_client = deploy_utils.get_supervisor_client(host,
      "impala", args.impala_config.cluster.name, job_name)
  job = args.impala_config.jobs[job_name]
  impalad = args.impala_config.jobs["impalad"]
  statestored = args.impala_config.jobs["statestored"]

  artifact_and_version = "impala-" + args.impala_config.cluster.version

  script_dict = {
    "artifact": artifact_and_version,
    "job_name": job_name,
    "run_dir": supervisor_client.get_run_dir(),
    "ticket_cache": "$run_dir/impala.tc",
    "params":
      "-webserver_port=%d " % (job.base_port + 1) +
      "-be_port=%d " % (impalad.base_port + 2) +
      "-planservice_port=%d " % (impalad.base_port + 3) +
      "-state_store_port=%d " % statestored.base_port +
      "-state_store_subscriber_port=%d " % (statestored.base_port + 1) +
      "-mem_limit=20% " + # TODO make this configurable
      "-state_store_host=%s " % statestored.hosts[0] +
      "-kerberos_reinit_interval=1200 " + # 20hours
      "-webserver_doc_root=$run_dir/package " +
      "-webserver_interface=%s " % host +
      #"-use_statestore=false " +
      "-log_dir=$run_dir/log " +
      "-v=2 " +
      "-logbuflevel=-1 " +
      "-sasl_path=$run_dir/package/lib/sasl2 ",
  }

  if job_name == "impalad":
    script_dict["params"] += "-beeswax_port=%d " % impalad.base_port
    script_dict["params"] += "-hs2_port=%d " % (impalad.base_port + 4)

  if deploy_utils.is_security_enabled(args):
    script_dict["params"] += "-principal=%s/hadoop@%s " % (
        args.impala_config.cluster.kerberos_username or "impala",
        args.impala_config.cluster.kerberos_realm)
    script_dict["params"] += "-keytab_file=%s/%s.keytab " % (
        deploy_utils.HADOOP_CONF_PATH,
        args.impala_config.cluster.kerberos_username or "impala")
    script_dict["params"] += "-tgt_file=$run_dir/impala.tc "

  return script_dict
def run_shell(args):
  get_hbase_service_config(args)

  main_class, options = deploy_utils.parse_shell_command(
      args, SHELL_COMMAND_INFO)
  if not main_class:
    return

  # parse the service_config, suppose the instance_id is -1
  args.hbase_config.parse_generated_config_files(args)
  core_site_dict = args.hbase_config.configuration.generated_files["core-site.xml"]
  hdfs_site_dict = args.hbase_config.configuration.generated_files["hdfs-site.xml"]
  hbase_site_dict = args.hbase_config.configuration.generated_files["hbase-site.xml"]

  hbase_opts = list()
  for key, value in core_site_dict.iteritems():
    hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))
  for key, value in hdfs_site_dict.iteritems():
    hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))
  for key, value in hbase_site_dict.iteritems():
    hbase_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))

  if deploy_utils.is_security_enabled(args):
    hbase_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
        deploy_utils.get_config_dir())

    (jaas_fd, jaas_file) = tempfile.mkstemp()
    args.zookeeper_config.parse_generated_config_files(args)
    os.write(jaas_fd, deploy_zookeeper.generate_client_jaas_config(args))
    os.close(jaas_fd)
    hbase_opts.append("-Djava.security.auth.login.config=%s" % jaas_file)

  package_root = deploy_utils.get_artifact_package_root(args,
      args.hbase_config.cluster, "hbase")
  class_path = "%s/:%s/lib/*:%s/*" % (package_root, package_root, package_root)

  cmd = ["java", "-cp", class_path] + hbase_opts + [main_class]
  if args.command[0] == "shell":
    cmd += ["-X+O", "%s/bin/hirb.rb" % package_root]
  cmd += options
  p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
  return p.wait()
Beispiel #15
0
def run_shell(args):
    get_hdfs_service_config(args)

    main_class, options = deploy_utils.parse_shell_command(
        args, SHELL_COMMAND_INFO)
    if not main_class:
        return
    # parse the service_config, suppose the instance_id is -1
    args.hdfs_config.parse_generated_config_files(args)
    core_site_dict = args.hdfs_config.configuration.generated_files[
        "core-site.xml"]
    hdfs_site_dict = args.hdfs_config.configuration.generated_files[
        "hdfs-site.xml"]

    hadoop_opts = list()
    for key, value in core_site_dict.iteritems():
        hadoop_opts.append("-D%s%s=%s" %
                           (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))
    for key, value in hdfs_site_dict.iteritems():
        hadoop_opts.append("-D%s%s=%s" %
                           (deploy_utils.HADOOP_PROPERTY_PREFIX, key, value))

    package_root = deploy_utils.get_artifact_package_root(
        args, args.hdfs_config.cluster, "hadoop")
    lib_root = "%s/share/hadoop" % package_root
    class_path = "%s/etc/hadoop" % package_root
    for component in ["common", "hdfs"]:
        component_dir = "%s/%s" % (lib_root, component)
        class_path += ":%s/:%s/*:%s/lib/*" % (component_dir, component_dir,
                                              component_dir)

    if deploy_utils.is_security_enabled(args):
        boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % (
            lib_root, args.hdfs_config.cluster.version)
        hadoop_opts.append("-Xbootclasspath/p:%s" % boot_class_path)
        hadoop_opts.append("-Dkerberos.instance=hadoop")
        hadoop_opts.append("-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
                           deploy_utils.get_config_dir())

    cmd = (["java", "-cp", class_path] + hadoop_opts + [main_class] + options)
    p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
    p.wait()
Beispiel #16
0
def run_shell(args):
  get_hdfs_service_config(args)

  main_class, options = deploy_utils.parse_shell_command(
      args, SHELL_COMMAND_INFO)
  if not main_class:
    return

  core_site_dict = args.hdfs_config.configuration.generated_files["core-site.xml"]
  hdfs_site_dict = args.hdfs_config.configuration.generated_files["hdfs-site.xml"]

  hadoop_opts = list()
  for key, value in core_site_dict.iteritems():
    hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))
  for key, value in hdfs_site_dict.iteritems():
    hadoop_opts.append("-D%s%s=%s" % (deploy_utils.HADOOP_PROPERTY_PREFIX,
          key, value))

  package_root = deploy_utils.get_hadoop_package_root(
      args.hdfs_config.cluster.version)
  lib_root = "%s/share/hadoop" % package_root
  class_path = "%s/etc/hadoop" % package_root
  for component in ["common", "hdfs"]:
    component_dir = "%s/%s" % (lib_root, component)
    class_path += ":%s/:%s/*:%s/lib/*" % (component_dir,
        component_dir, component_dir)

  if deploy_utils.is_security_enabled(args):
    boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % (lib_root,
        args.hdfs_config.cluster.version)
    hadoop_opts.append("-Xbootclasspath/p:%s" % boot_class_path)
    hadoop_opts.append("-Dkerberos.instance=hadoop")
    hadoop_opts.append(
        "-Djava.security.krb5.conf=%s/krb5-hadoop.conf" %
        deploy_utils.get_config_dir())

  cmd = (["java", "-cp", class_path] + hadoop_opts +
      [main_class] + options)
  p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stderr)
  p.wait()
Beispiel #17
0
def generate_run_scripts_params(args, host, job_name):
    job = args.hbase_config.jobs[job_name]

    supervisor_client = deploy_utils.get_supervisor_client(
        host, "hbase", args.hbase_config.cluster.name, job_name)

    artifact_and_version = "hbase-" + args.hbase_config.cluster.version

    component_dir = "$package_dir/"
    # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under
    # this dir but we also need access the webapps under this dir.
    jar_dirs = "%s/:%s/lib/*:%s/*" % (component_dir, component_dir,
                                      component_dir)

    script_dict = {
        "artifact":
        artifact_and_version,
        "job_name":
        job_name,
        "jar_dirs":
        jar_dirs,
        "run_dir":
        supervisor_client.get_run_dir(),
        "params":
        '-Xmx%dm ' % job.xmx + '-Xms%dm ' % job.xms + '-Xmn%dm ' % job.xmn +
        '-Xss256k ' + '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
        '-XX:MaxPermSize=%dm ' % job.max_perm_size +
        '-XX:PermSize=%dm ' % job.max_perm_size +
        '-XX:+HeapDumpOnOutOfMemoryError ' + '-XX:HeapDumpPath=$log_dir ' +
        '-XX:+PrintGCApplicationStoppedTime ' + '-XX:+UseConcMarkSweepGC ' +
        '-verbose:gc ' + '-XX:+PrintGCDetails ' + '-XX:+PrintGCDateStamps ' +
        '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name +
        '-XX:+UseMembar ' + '-XX:SurvivorRatio=1 ' +
        '-XX:+UseCMSCompactAtFullCollection ' +
        '-XX:CMSInitiatingOccupancyFraction=75 ' +
        '-XX:+UseCMSInitiatingOccupancyOnly ' +
        '-XX:+CMSParallelRemarkEnabled ' + '-XX:+UseNUMA ' +
        '-XX:+CMSClassUnloadingEnabled ' + '-XX:+PrintSafepointStatistics ' +
        '-XX:PrintSafepointStatisticsCount=1 ' + '-XX:+PrintHeapAtGC ' +
        '-XX:+PrintTenuringDistribution ' +
        '-XX:CMSMaxAbortablePrecleanTime=10000 ' +
        '-XX:TargetSurvivorRatio=80 ' + '-XX:+UseGCLogFileRotation ' +
        '-XX:NumberOfGCLogFiles=100 ' + '-XX:GCLogFileSize=128m ' +
        '-XX:CMSWaitDuration=2000 ' + '-XX:+CMSScavengeBeforeRemark ' +
        '-XX:+PrintPromotionFailure ' + '-XX:ConcGCThreads=8 ' +
        '-XX:ParallelGCThreads=8 ' + '-XX:PretenureSizeThreshold=4m ' +
        '-XX:+CMSConcurrentMTEnabled ' + '-XX:+ExplicitGCInvokesConcurrent ' +
        '-XX:+SafepointTimeout ' + '-XX:MonitorBound=16384 ' +
        '-XX:OldPLABSize=16 ' + '-XX:-ResizeOldPLAB ' +
        '-XX:-UseBiasedLocking ' + '-Dproc_%s ' % job_name +
        '-Djava.net.preferIPv4Stack=true ' + '-Dhbase.log.dir=$log_dir ' +
        '-Dhbase.pid=$pid ' +
        '-Dhbase.cluster=%s ' % args.hbase_config.cluster.name +
        '-Dhbase.policy.file=hbase-policy.xml ' +
        '-Dhbase.home.dir=$package_dir ' +
        '-Djava.security.krb5.conf=$run_dir/krb5.conf ' +
        '-Dhbase.id.str=%s ' % args.remote_user +
        get_job_specific_params(args, job_name),
    }

    if deploy_utils.is_security_enabled(args):
        jaas_path = "%s/jaas.conf" % supervisor_client.get_run_dir()
        script_dict[
            "params"] += "-Djava.security.auth.login.config=%s " % jaas_path
        boot_class_path = ("$package_dir/lib/hadoop-security-%s.jar" %
                           args.hdfs_config.cluster.version)
        script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path

    script_dict["params"] += JOB_MAIN_CLASS[job_name]
    return script_dict
Beispiel #18
0
def generate_run_scripts_params(args, host, job_name):
  job = args.hdfs_config.jobs[job_name]

  supervisor_client = deploy_utils.get_supervisor_client(host,
      "hdfs", args.hdfs_config.cluster.name, job_name)

  artifact_and_version = "hadoop-" + args.hdfs_config.cluster.version

  jar_dirs = ""
  # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under
  # this dir but we also need access the webapps under this dir.
  for component in ["common", "hdfs"]:
    if jar_dirs: jar_dirs += ":"
    component_dir = ("$package_dir/share/hadoop/%s" % component)
    jar_dirs += "%s/:%s/lib/*:%s/*" % (
        component_dir, component_dir, component_dir)

  script_dict = {
      "artifact": artifact_and_version,
      "job_name": job_name,
      "jar_dirs": jar_dirs,
      "run_dir": supervisor_client.get_run_dir(),
      "params":
          '-Xmx%dm ' % job.xmx +
          '-Xms%dm ' % job.xms +
          '-Xmn%dm ' % job.xmn +
          '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
          '-XX:MaxPermSize=%dm ' % job.max_perm_size +
          '-XX:+DisableExplicitGC ' +
          '-XX:+HeapDumpOnOutOfMemoryError ' +
          '-XX:HeapDumpPath=$log_dir ' +
          '-XX:+PrintGCApplicationStoppedTime ' +
          '-XX:+UseConcMarkSweepGC ' +
          '-XX:CMSInitiatingOccupancyFraction=80 ' +
          '-XX:+UseMembar ' +
          '-verbose:gc ' +
          '-XX:+PrintGCDetails ' +
          '-XX:+PrintGCDateStamps ' +
          '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name +
          '-Dproc_%s ' % job_name +
          '-Djava.net.preferIPv4Stack=true ' +
          '-Dhdfs.log.dir=$log_dir ' +
          '-Dhdfs.pid=$pid ' +
          '-Dhdfs.cluster=%s ' % args.hdfs_config.cluster.name +
          '-Dhadoop.policy.file=hadoop-policy.xml ' +
          '-Dhadoop.home.dir=$package_dir ' +
          '-Djava.security.krb5.conf=$run_dir/krb5.conf ' +
          '-Dhadoop.id.str=%s ' % args.remote_user,
  }

  # config security-related params
  if deploy_utils.is_security_enabled(args):
    class_path_root = "$package_dir/share/hadoop/"
    boot_class_path = "%s/common/lib/hadoop-security-%s.jar" % (
      class_path_root, args.hdfs_config.cluster.version)
    script_dict["params"] += ('-Xbootclasspath/p:%s ' % boot_class_path +
      '-Dkerberos.instance=hadoop ')

  # finally, add the job's main class name
  script_dict["params"] += (get_job_specific_params(args, job_name) +
    JOB_MAIN_CLASS[job_name])
  return script_dict
Beispiel #19
0
def generate_run_scripts_params(args, host, job_name):
  job = args.yarn_config.jobs[job_name]

  supervisor_client = deploy_utils.get_supervisor_client(host,
      "yarn", args.yarn_config.cluster.name, job_name)

  artifact_and_version = "hadoop-" + args.yarn_config.cluster.version

  jar_dirs = ""
  for component in ["common", "mapreduce", "yarn", "hdfs"]:
    if jar_dirs: jar_dirs += ":"
    component_dir = ("$package_dir/share/hadoop/%s" % component)
    jar_dirs += "%s/:%s/lib/*:%s/*" % (
        component_dir, component_dir, component_dir)

  service_env = ""
  for component_path in ["HADOOP_COMMON_HOME", "HADOOP_HDFS_HOME", "YARN_HOME"]:
    service_env += "export %s=$package_dir\n" % (component_path)

  script_dict = {
      "artifact": artifact_and_version,
      "job_name": job_name,
      "jar_dirs": jar_dirs,
      "run_dir": supervisor_client.get_run_dir(),
      "service_env": service_env,
      "params":
          '-Xmx%dm ' % job.xmx +
          '-Xms%dm ' % job.xms +
          '-Xmn%dm ' % job.xmn +
          '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
          '-XX:MaxPermSize=%dm ' % job.max_perm_size +
          '-XX:+DisableExplicitGC ' +
          '-XX:+HeapDumpOnOutOfMemoryError ' +
          '-XX:HeapDumpPath=$log_dir ' +
          '-XX:+PrintGCApplicationStoppedTime ' +
          '-XX:+UseConcMarkSweepGC ' +
          '-XX:CMSInitiatingOccupancyFraction=80 ' +
          '-XX:+UseMembar ' +
          '-verbose:gc ' +
          '-XX:+PrintGCDetails ' +
          '-XX:+PrintGCDateStamps ' +
          '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name +
          '-Dproc_%s ' % job_name +
          '-Djava.net.preferIPv4Stack=true ' +
          '-Dyarn.log.dir=$log_dir ' +
          '-Dyarn.pid=$pid ' +
          '-Dyarn.cluster=%s ' % args.yarn_config.cluster.name +
          '-Dhadoop.policy.file=hadoop-policy.xml ' +
          '-Dhadoop.home.dir=$package_dir ' +
          '-Dhadoop.id.str=%s ' % args.remote_user +
          '-Djava.security.krb5.conf=$run_dir/krb5.conf ' +
          get_job_specific_params(args, job_name)
  }

  if deploy_utils.is_security_enabled(args):
    class_path_root = "$package_dir/share/hadoop"
    boot_class_path = ("%s/common/lib/hadoop-security-%s.jar" % (
          class_path_root, args.hdfs_config.cluster.version))
    script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path

  script_dict["params"] += JOB_MAIN_CLASS[job_name]
  return script_dict
Beispiel #20
0
def generate_run_scripts_params(args, host, job_name):
  job = args.hbase_config.jobs[job_name]

  supervisor_client = deploy_utils.get_supervisor_client(host,
      "hbase", args.hbase_config.cluster.name, job_name)

  artifact_and_version = "hbase-" + args.hbase_config.cluster.version

  component_dir = "$package_dir/"
  # must include both [dir]/ and [dir]/* as [dir]/* only import all jars under
  # this dir but we also need access the webapps under this dir.
  jar_dirs = "%s/:%s/lib/*:%s/*" % (component_dir, component_dir, component_dir)

  script_dict = {
      "artifact": artifact_and_version,
      "job_name": job_name,
      "jar_dirs": jar_dirs,
      "run_dir": supervisor_client.get_run_dir(),
      "params":
          '-Xmx%dm ' % job.xmx +
          '-Xms%dm ' % job.xms +
          '-Xmn%dm ' % job.xmn +
          '-Xss256k ' +
          '-XX:MaxDirectMemorySize=%dm ' % job.max_direct_memory +
          '-XX:MaxPermSize=%dm ' % job.max_perm_size +
          '-XX:PermSize=%dm ' % job.max_perm_size +
          '-XX:+HeapDumpOnOutOfMemoryError ' +
          '-XX:HeapDumpPath=$log_dir ' +
          '-XX:+PrintGCApplicationStoppedTime ' +
          '-XX:+UseConcMarkSweepGC ' +
          '-verbose:gc ' +
          '-XX:+PrintGCDetails ' +
          '-XX:+PrintGCDateStamps ' +
          '-Xloggc:$run_dir/stdout/%s_gc_${start_time}.log ' % job_name +
          '-XX:+UseMembar ' +
          '-XX:SurvivorRatio=1 ' +
          '-XX:+UseCMSCompactAtFullCollection ' +
          '-XX:CMSInitiatingOccupancyFraction=75 ' +
          '-XX:+UseCMSInitiatingOccupancyOnly ' +
          '-XX:+CMSParallelRemarkEnabled ' +
          '-XX:+UseNUMA ' +
          '-XX:+CMSClassUnloadingEnabled ' +
          '-XX:+PrintSafepointStatistics ' +
          '-XX:PrintSafepointStatisticsCount=1 ' +
          '-XX:+PrintHeapAtGC ' +
          '-XX:+PrintTenuringDistribution ' +
          '-XX:CMSMaxAbortablePrecleanTime=10000 ' +
          '-XX:TargetSurvivorRatio=80 ' +
          '-XX:+UseGCLogFileRotation ' +
          '-XX:NumberOfGCLogFiles=100 ' +
          '-XX:GCLogFileSize=128m ' +
          '-XX:CMSWaitDuration=2000 ' +
          '-XX:+CMSScavengeBeforeRemark ' +
          '-XX:+PrintPromotionFailure ' +
          '-XX:ConcGCThreads=8 ' +
          '-XX:ParallelGCThreads=8 ' +
          '-XX:PretenureSizeThreshold=4m ' +
          '-XX:+CMSConcurrentMTEnabled ' +
          '-XX:+ExplicitGCInvokesConcurrent ' +
          '-XX:+SafepointTimeout ' +
          '-XX:MonitorBound=16384 ' +
          '-XX:OldPLABSize=16 ' +
          '-XX:-ResizeOldPLAB ' +
          '-XX:-UseBiasedLocking ' +
          '-Dproc_%s ' % job_name +
          '-Djava.net.preferIPv4Stack=true ' +
          '-Dhbase.log.dir=$log_dir ' +
          '-Dhbase.pid=$pid ' +
          '-Dhbase.cluster=%s ' % args.hbase_config.cluster.name +
          '-Dhbase.policy.file=hbase-policy.xml ' +
          '-Dhbase.home.dir=$package_dir ' +
          '-Djava.security.krb5.conf=$run_dir/krb5.conf ' +
          '-Dhbase.id.str=%s ' % args.remote_user +
          get_job_specific_params(args, job_name),
  }

  if deploy_utils.is_security_enabled(args):
    jaas_path = "%s/jaas.conf" % supervisor_client.get_run_dir()
    script_dict["params"] += "-Djava.security.auth.login.config=%s " % jaas_path
    boot_class_path = ("$package_dir/lib/hadoop-security-%s.jar" %
        args.hdfs_config.cluster.version)
    script_dict["params"] += "-Xbootclasspath/p:%s " % boot_class_path

  script_dict["params"] += JOB_MAIN_CLASS[job_name]
  return script_dict