def is_active_namenode(hdfs_binary):
  """
  Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active returns False.
  :return: True if current NameNode is active, False otherwise
  """
  import params

  if params.dfs_ha_enabled:
    is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
    is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})

    for i in range(0, 5):
      code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0
      if code == 0: # active
        return True

      code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code will be 0
      if code == 0: # other NN is active
        return False

      if i < 4: # Do not sleep after last iteration
        time.sleep(6)

    Logger.info("Active NameNode is not found.")
    return False

  else:
    return True
Example #2
0
def knox():
    import params

    directories = [params.knox_data_dir, params.knox_logs_dir, params.knox_pid_dir, params.knox_conf_dir, os.path.join(params.knox_conf_dir, "topologies")]
    for directory in directories:
      Directory(directory,
                owner = params.knox_user,
                group = params.knox_group,
                recursive = True
      )

    XmlConfig("gateway-site.xml",
              conf_dir=params.knox_conf_dir,
              configurations=params.config['configurations']['gateway-site'],
              configuration_attributes=params.config['configuration_attributes']['gateway-site'],
              owner=params.knox_user,
              group=params.knox_group,
    )

    File(format("{params.knox_conf_dir}/gateway-log4j.properties"),
         mode=0644,
         group=params.knox_group,
         owner=params.knox_user,
         content=params.gateway_log4j
    )

    File(format("{params.knox_conf_dir}/topologies/default.xml"),
         group=params.knox_group,
         owner=params.knox_user,
         content=InlineTemplate(params.topology_template)
    )
    if params.security_enabled:
      TemplateConfig( format("{knox_conf_dir}/krb5JAASLogin.conf"),
                      owner = params.knox_user,
                      template_tag = None
      )

    dirs_to_chown = tuple(directories)
    cmd = ('chown','-R',format('{knox_user}:{knox_group}')) + dirs_to_chown
    Execute(cmd,
            sudo = True,
    )

    cmd = format('{knox_client_bin} create-master --master {knox_master_secret!p}')
    master_secret_exist = as_user(format('test -f {knox_master_secret_path}'), params.knox_user)

    Execute(cmd,
            user=params.knox_user,
            environment={'JAVA_HOME': params.java_home},
            not_if=master_secret_exist,
    )

    cmd = format('{knox_client_bin} create-cert --hostname {knox_host_name_in_cluster}')
    cert_store_exist = as_user(format('test -f {knox_cert_store_path}'), params.knox_user)

    Execute(cmd,
            user=params.knox_user,
            environment={'JAVA_HOME': params.java_home},
            not_if=cert_store_exist,
    )
Example #3
0
def is_active_namenode(hdfs_binary):
  """
  Checks if current NameNode is active. Waits up to 30 seconds. If other NameNode is active returns False.
  :return: True if current NameNode is active, False otherwise
  """
  import params

  if params.dfs_ha_enabled:
    is_active_this_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
    is_active_other_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {other_namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})

    for i in range(0, 5):
      code, out = shell.call(is_active_this_namenode_cmd) # If active NN, code will be 0
      if code == 0: # active
        return True

      code, out = shell.call(is_active_other_namenode_cmd) # If other NN is active, code will be 0
      if code == 0: # other NN is active
        return False

      if i < 4: # Do not sleep after last iteration
        time.sleep(6)

    Logger.info("Active NameNode is not found.")
    return False

  else:
    return True
Example #4
0
def webhcat_service(action='start', upgrade_type=None):
    import params

    environ = {'HADOOP_HOME': params.hadoop_home}

    cmd = format('{webhcat_bin_dir}/webhcat_server.sh')

    if action == 'start':
        if upgrade_type is not None and params.version and params.stack_root:
            environ['HADOOP_HOME'] = format("{stack_root}/{version}/hadoop")

        daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} start')
        no_op_test = as_user(format(
            'ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1'
        ),
                             user=params.webhcat_user)
        try:
            Execute(daemon_cmd,
                    user=params.webhcat_user,
                    not_if=no_op_test,
                    environment=environ)
        except:
            show_logs(params.hcat_log_dir, params.webhcat_user)
            raise
    elif action == 'stop':
        try:
            graceful_stop(cmd, environ)
        except Fail:
            show_logs(params.hcat_log_dir, params.webhcat_user)
            Logger.info(traceback.format_exc())

        pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"),
                                       user=params.webhcat_user) + "`"
        process_id_exists_command = format(
            "ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1"
        )
        daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}")
        wait_time = 10
        Execute(
            daemon_hard_kill_cmd,
            not_if=format(
                "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"
            ),
            ignore_failures=True)

        try:
            # check if stopped the process, else fail the task
            Execute(
                format("! ({process_id_exists_command})"),
                tries=20,
                try_sleep=3,
            )
        except:
            show_logs(params.hcat_log_dir, params.webhcat_user)
            raise

        File(
            params.webhcat_pid_file,
            action="delete",
        )
Example #5
0
def knox():
    import params

    Directory([params.knox_data_dir, params.knox_logs_dir, params.knox_pid_dir, params.knox_conf_dir, os.path.join(params.knox_conf_dir, "topologies")],
              owner = params.knox_user,
              group = params.knox_group,
              create_parents = True,
              cd_access = "a",
              mode = 0755,
              recursive_ownership = True,
              recursion_follow_links = True,
    )


    XmlConfig("gateway-site.xml",
              conf_dir=params.knox_conf_dir,
              configurations=params.config['configurations']['gateway-site'],
              configuration_attributes=params.config['configuration_attributes']['gateway-site'],
              owner=params.knox_user,
              group=params.knox_group,
    )

    File(format("{params.knox_conf_dir}/gateway-log4j.properties"),
         mode=0644,
         group=params.knox_group,
         owner=params.knox_user,
         content=params.gateway_log4j
    )

    File(format("{params.knox_conf_dir}/topologies/default.xml"),
         group=params.knox_group,
         owner=params.knox_user,
         content=InlineTemplate(params.topology_template)
    )
    if params.security_enabled:
      TemplateConfig( format("{knox_conf_dir}/krb5JAASLogin.conf"),
                      owner = params.knox_user,
                      template_tag = None
      )


    cmd = format('{knox_client_bin} create-master --master {knox_master_secret!p}')
    master_secret_exist = as_user(format('test -f {knox_master_secret_path}'), params.knox_user)

    Execute(cmd,
            user=params.knox_user,
            environment={'JAVA_HOME': params.java_home},
            not_if=master_secret_exist,
    )

    cmd = format('{knox_client_bin} create-cert --hostname {knox_host_name_in_cluster}')
    cert_store_exist = as_user(format('test -f {knox_cert_store_path}'), params.knox_user)

    Execute(cmd,
            user=params.knox_user,
            environment={'JAVA_HOME': params.java_home},
            not_if=cert_store_exist,
    )
Example #6
0
def accumulo_service(name, action='start'):  # 'start' or 'stop' or 'status'
    import params

    role = name
    pid_file = format("{pid_dir}/accumulo-{accumulo_user}-{role}.pid")

    pid_exists = format(
        "ls {pid_file} >/dev/null 2>&1 && ps `cat {pid_file}` >/dev/null 2>&1")

    if action == 'start':
        Directory(os.path.expanduser(format("~{accumulo_user}")),
                  owner=params.accumulo_user,
                  group=params.user_group,
                  recursive_ownership=True)

        if name != 'tserver':
            Execute(format(
                "{daemon_script} org.apache.accumulo.master.state.SetGoalState NORMAL"
            ),
                    not_if=as_user(pid_exists, params.accumulo_user),
                    user=params.accumulo_user)
        address = params.hostname
        if name == 'monitor' and params.accumulo_monitor_bind_all:
            address = '0.0.0.0'
        daemon_cmd = format(
            "{daemon_script} {role} --address {address} > {log_dir}/accumulo-{role}.out 2>{log_dir}/accumulo-{role}.err & echo $! > {pid_file}"
        )
        try:
            Execute(daemon_cmd,
                    not_if=as_user(pid_exists, params.accumulo_user),
                    user=params.accumulo_user)
        except:
            show_logs(params.log_dir, params.accumulo_user)
            raise

    elif action == 'stop':
        no_pid_exists = format("! ({pid_exists})")

        pid = format("`cat {pid_file}` >/dev/null 2>&1")
        Execute(format("kill {pid}"),
                not_if=as_user(no_pid_exists, params.accumulo_user),
                user=params.accumulo_user)
        Execute(
            format("kill -9 {pid}"),
            not_if=as_user(
                format(
                    "sleep 2; {no_pid_exists} || sleep 20; {no_pid_exists}"),
                params.accumulo_user),
            ignore_failures=True,
            user=params.accumulo_user)
        Execute(format("rm -f {pid_file}"), user=params.accumulo_user)
Example #7
0
def webhcat_service(action='start', upgrade_type=None):
    import params

    environ = {'HADOOP_HOME': "/usr/lib/hadoop"}
    env = format(
        'export HIVE_HOME=/usr/lib/hive; export HCAT_HOME=/usr/lib/hive-hcatalog'
    )

    cmd = format('{webhcat_bin_dir}/webhcat_server.sh')

    if action == 'start':
        if upgrade_type is not None and params.version:
            environ['HADOOP_HOME'] = format("/usr/lib/hadoop")

        daemon_cmd = format('{env} ; cd {hcat_pid_dir} ; {cmd} start')
        no_op_test = as_user(format(
            'ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1'
        ),
                             user=params.webhcat_user)
        Execute(daemon_cmd,
                user=params.webhcat_user,
                not_if=no_op_test,
                environment=environ)
    elif action == 'stop':
        daemon_cmd = format('{env} ; {cmd} stop')
        Execute(daemon_cmd, user=params.webhcat_user, environment=environ)

        pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"),
                                       user=params.webhcat_user) + "`"
        process_id_exists_command = format(
            "ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1"
        )
        daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}")
        wait_time = 10
        Execute(
            daemon_hard_kill_cmd,
            not_if=format(
                "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"
            ))

        # check if stopped the process, else fail the task
        Execute(
            format("! ({process_id_exists_command})"),
            tries=20,
            try_sleep=3,
        )

        File(
            params.webhcat_pid_file,
            action="delete",
        )
Example #8
0
def create_user(user, password):
    import params
    rpassfile = format("{params.exec_tmp_dir}/pass0")
    passfile = format("{params.exec_tmp_dir}/pass")
    cmdfile = format("{params.exec_tmp_dir}/cmds")
    try:
        File(cmdfile,
             mode=0600,
             group=params.user_group,
             owner=params.accumulo_user,
             content=InlineTemplate(
                 format("createuser {user}\n"
                        "grant -s System.CREATE_TABLE -u {user}\n\n")))
        if params.security_enabled and params.has_secure_user_auth:
            Execute(format(
                "{params.kinit_cmd} {params.daemon_script} shell -f "
                "{cmdfile}"),
                    not_if=as_user(
                        format("{params.kinit_cmd} "
                               "{params.daemon_script} shell "
                               "-e \"userpermissions -u {user}\""),
                        params.accumulo_user),
                    user=params.accumulo_user)
        else:
            File(rpassfile,
                 mode=0600,
                 group=params.user_group,
                 owner=params.accumulo_user,
                 content=InlineTemplate('{{root_password}}\n\n'))
            File(passfile,
                 mode=0600,
                 group=params.user_group,
                 owner=params.accumulo_user,
                 content=InlineTemplate(
                     format("{params.root_password}\n"
                            "{password}\n"
                            "{password}\n\n")))
            Execute(format(
                "cat {passfile} | {params.daemon_script} shell -u root "
                "-f {cmdfile}"),
                    not_if=as_user(
                        format("cat {rpassfile} | "
                               "{params.daemon_script} shell -u root "
                               "-e \"userpermissions -u {user}\""),
                        params.accumulo_user),
                    user=params.accumulo_user)
    finally:
        try_remove(rpassfile)
        try_remove(passfile)
        try_remove(cmdfile)
Example #9
0
def prepare_upgrade_save_namespace():
  """
  During a NonRolling (aka Express Upgrade), preparing the NameNode requires saving the namespace.
  """
  import params

  dfsadmin_base_command = get_dfsadmin_base_command('hdfs')
  save_namespace_cmd = dfsadmin_base_command + " -saveNamespace"
  try:
    Logger.info("Checkpoint the current namespace.")
    as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': params.hadoop_bin_dir})
  except Exception, e:
    message = format("Could not save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}")
    Logger.error(message)
    raise Fail(message)
Example #10
0
def kill_zkfc(zkfc_user):
    """
  There are two potential methods for failing over the namenode, especially during a Rolling Upgrade.
  Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it.
  Option 2. Silent failover
  :param zkfc_user: User that started the ZKFC process.
  :return: Return True if ZKFC was killed, otherwise, false.
  """
    import params
    if params.dfs_ha_enabled:
        if params.zkfc_pid_file:
            check_process = as_user(format(
                "ls {zkfc_pid_file} > /dev/null 2>&1 && ps -p `cat {zkfc_pid_file}` > /dev/null 2>&1"
            ),
                                    user=zkfc_user)
            code, out = shell.call(check_process)
            if code == 0:
                Logger.debug("ZKFC is running and will be killed.")
                kill_command = format("kill -15 `cat {zkfc_pid_file}`")
                Execute(kill_command, user=zkfc_user)
                File(
                    params.zkfc_pid_file,
                    action="delete",
                )
                return True
    return False
Example #11
0
def upload_configuration_to_zk(zookeeper_quorum,
                               solr_znode,
                               config_set,
                               config_set_dir,
                               tmp_config_set_dir,
                               java64_home,
                               user,
                               retry=5,
                               interval=10):
    """
  Upload configuration set to zookeeper with solrCloudCli.sh
  At first, it tries to download configuration set if exists into a temporary location, then upload that one to
  zookeeper. (if the configuration changed there, in that case the user wont redefine it)
  If the configuration set does not exits in zookeeper then upload it based on the config_set_dir parameter.
  """
    solr_cli_prefix = __create_solr_cloud_cli_prefix(zookeeper_quorum,
                                                     solr_znode, java64_home)
    Execute(format(
        '{solr_cli_prefix} --download-config --config-dir {tmp_config_set_dir} --config-set {config_set} --retry {retry} --interval {interval}'
    ),
            only_if=as_user(
                format(
                    "{solr_cli_prefix} --check-config --config-set {config_set} --retry {retry} --interval {interval}"
                ), user),
            user=user)

    Execute(format(
        '{solr_cli_prefix} --upload-config --config-dir {config_set_dir} --config-set {config_set} --retry {retry} --interval {interval}'
    ),
            not_if=format("test -d {tmp_config_set_dir}"),
            user=user)
Example #12
0
def kill_zkfc(zkfc_user):
  """
  There are two potential methods for failing over the namenode, especially during a Rolling Upgrade.
  Option 1. Kill zkfc on primary namenode provided that the secondary is up and has zkfc running on it.
  Option 2. Silent failover (not supported as of HDP 2.2.0.0)
  :param zkfc_user: User that started the ZKFC process.
  :return: Return True if ZKFC was killed, otherwise, false.
  """
  import params
  if params.dfs_ha_enabled:
    zkfc_pid_file = get_service_pid_file("zkfc", zkfc_user)
    if zkfc_pid_file:
      check_process = as_user(format("ls {zkfc_pid_file} > /dev/null 2>&1 && ps -p `cat {zkfc_pid_file}` > /dev/null 2>&1"), user=zkfc_user)
      code, out = shell.call(check_process)
      if code == 0:
        Logger.debug("ZKFC is running and will be killed.")
        kill_command = format("kill -15 `cat {zkfc_pid_file}`")
        Execute(kill_command,
             user=zkfc_user
        )
        File(zkfc_pid_file,
             action = "delete",
        )
        return True
  return False
Example #13
0
def create_metastore_schema():
  import params

  create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                             "{hive_schematool_bin}/schematool -initSchema "
                             "-dbType {hive_metastore_db_type} "
                             "-userName {hive_metastore_user_name} "
                             "-passWord {hive_metastore_user_passwd!p} -verbose")

  check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                                    "{hive_schematool_bin}/schematool -info "
                                    "-dbType {hive_metastore_db_type} "
                                    "-userName {hive_metastore_user_name} "
                                    "-passWord {hive_metastore_user_passwd!p} -verbose"), params.hive_user)

  # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords.
  # Fixing it with the hack below:
  quoted_hive_metastore_user_passwd = quote_bash_args(quote_bash_args(params.hive_metastore_user_passwd))
  if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \
      or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"':
    quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[1:-1]
  Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(check_schema_created_cmd.replace(
      format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING))

  Execute(create_schema_cmd,
          not_if = check_schema_created_cmd,
          user = params.hive_user
  )
 def test_start_secured(self, isfile_mock):
   isfile_mock.return_value = True
   self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR + "/scripts/oozie_server.py",
                        classname = "OozieServer",
                        command = "start",
                        config_file="secured.json",
                        hdp_stack_version = self.STACK_VERSION,
                        target = RMFTestCase.TARGET_COMMON_SERVICES
   )
   self.assert_configure_secured()
   self.assertResourceCalled('Execute', 'cd /var/tmp/oozie && /usr/lib/oozie/bin/ooziedb.sh create -sqlfile oozie.sql -run',
                             not_if = 'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1',
                             ignore_failures = True,
                             user = '******',
                             )
   self.assertResourceCalled('Execute', '/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -put /usr/lib/oozie/share /user/oozie ; hadoop --config /etc/hadoop/conf dfs -chmod -R 755 /user/oozie/share',
                             not_if = shell.as_user("/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -ls /user/oozie/share | awk 'BEGIN {count=0;} /share/ {count++} END {if (count > 0) {exit 0} else {exit 1}}'", "oozie"),
                             user = '******',
                             path = ['/usr/bin:/usr/bin'],
                             )
   self.assertResourceCalled('Execute', 'cd /var/tmp/oozie && /usr/lib/oozie/bin/oozie-start.sh',
                             not_if = 'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1',
                             user = '******',
                             )
   self.assertNoMoreResources()
Example #15
0
 def startRebalancingProcess(threshold, rebalance_env):
     rebalanceCommand = format(
         'hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}'
     )
     return as_user(rebalanceCommand,
                    params.hdfs_user,
                    env=rebalance_env)
Example #16
0
def get_user_call_output(command, user, is_checked_call=True, **call_kwargs):
  """
  This function eliminates only output of command inside the su, ignoring the su ouput itself.
  This is useful since some users have motd messages setup by default on su -l. 
  
  @return: code, stdout, stderr
  """
  command_string = shell.string_cmd_from_args_list(command) if isinstance(command, (list, tuple)) else command
  out_files = []
  
  try:
    out_files.append(tempfile.NamedTemporaryFile())
    out_files.append(tempfile.NamedTemporaryFile())
    
    # other user should be able to write to it
    for f in out_files:
      os.chmod(f.name, 0666)
    
    command_string += " 1>" + out_files[0].name
    command_string += " 2>" + out_files[1].name
    
    func = shell.checked_call if is_checked_call else shell.call
    func_result = func(shell.as_user(command_string, user), **call_kwargs)
    
    files_output = []
    for f in out_files:
      files_output.append(f.read())
    
    return func_result[0], files_output[0], files_output[1]
  finally:
    for f in out_files:
      f.close()
def prepare_upgrade_save_namespace(hdfs_binary):
  """
  During a NonRolling (aka Express Upgrade), preparing the NameNode requires saving the namespace.
  :param hdfs_binary: name/path of the HDFS binary to use
  """
  import params

  dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary)
  save_namespace_cmd = dfsadmin_base_command + " -saveNamespace"
  try:
    Logger.info("Checkpoint the current namespace.")
    as_user(save_namespace_cmd, params.hdfs_user, env={'PATH': params.hadoop_bin_dir})
  except Exception, e:
    message = format("Could not save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}")
    Logger.error(message)
    raise Fail(message)
Example #18
0
def create_metastore_schema():
    import params

    create_schema_cmd = format(
        "export HIVE_CONF_DIR={hive_server_conf_dir} ; "
        "{hive_schematool_bin}/schematool -initSchema "
        "-dbType {hive_metastore_db_type} "
        "-userName {hive_metastore_user_name} "
        "-passWord {hive_metastore_user_passwd!p} -verbose")

    check_schema_created_cmd = as_user(
        format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
               "{hive_schematool_bin}/schematool -info "
               "-dbType {hive_metastore_db_type} "
               "-userName {hive_metastore_user_name} "
               "-passWord {hive_metastore_user_passwd!p} -verbose"),
        params.hive_user)
    quoted_hive_metastore_user_passwd = quote_bash_args(
        quote_bash_args(params.hive_metastore_user_passwd))
    if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \
            or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"':
        quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[
            1:-1]
    Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(
        check_schema_created_cmd.replace(
            format("-passWord {quoted_hive_metastore_user_passwd}"),
            "-passWord " + utils.PASSWORDS_HIDE_STRING))

    Execute(create_schema_cmd,
            not_if=check_schema_created_cmd,
            user=params.hive_user)
Example #19
0
 def test_start_secured(self, isfile_mock):
     isfile_mock.return_value = True
     self.executeScript(self.COMMON_SERVICES_PACKAGE_DIR +
                        "/scripts/oozie_server.py",
                        classname="OozieServer",
                        command="start",
                        config_file="secured.json",
                        hdp_stack_version=self.STACK_VERSION,
                        target=RMFTestCase.TARGET_COMMON_SERVICES)
     self.assert_configure_secured()
     self.assertResourceCalled(
         'Execute',
         'cd /var/tmp/oozie && /usr/lib/oozie/bin/ooziedb.sh create -sqlfile oozie.sql -run',
         not_if=
         'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1',
         ignore_failures=True,
         user='******',
     )
     self.assertResourceCalled(
         'Execute',
         '/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -put /usr/lib/oozie/share /user/oozie ; hadoop --config /etc/hadoop/conf dfs -chmod -R 755 /user/oozie/share',
         not_if=shell.as_user(
             "/usr/bin/kinit -kt /etc/security/keytabs/oozie.service.keytab oozie/[email protected]; hadoop --config /etc/hadoop/conf dfs -ls /user/oozie/share | awk 'BEGIN {count=0;} /share/ {count++} END {if (count > 0) {exit 0} else {exit 1}}'",
             "oozie"),
         user='******',
         path=['/usr/bin:/usr/bin'],
     )
     self.assertResourceCalled(
         'Execute',
         'cd /var/tmp/oozie && /usr/lib/oozie/bin/oozie-start.sh',
         not_if=
         'ls /var/run/oozie/oozie.pid >/dev/null 2>&1 && ps -p `cat /var/run/oozie/oozie.pid` >/dev/null 2>&1',
         user='******',
     )
     self.assertNoMoreResources()
Example #20
0
def webhcat_service(action='start', upgrade_type=None):
  import params

  environ = {
    'HADOOP_HOME': params.hadoop_home
  }

  cmd = format('{webhcat_bin_dir}/webhcat_server.sh')

  if action == 'start':
    if upgrade_type is not None and params.version:
      environ['HADOOP_HOME'] = format("/usr/iop/{version}/hadoop")

    daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} start')
    no_op_test = as_user(format('ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1'), user=params.webhcat_user)
    Execute(daemon_cmd,
            user=params.webhcat_user,
            not_if=no_op_test,
            environment = environ)
  elif action == 'stop':
    daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} stop')
    Execute(daemon_cmd,
            user = params.webhcat_user,
            environment = environ)
    File(params.webhcat_pid_file,
         action="delete",
    )
Example #21
0
def check_fs_root(conf_dir, execution_path):
    import params

    if not params.manage_hive_fsroot:
        Logger.info(
            "Skipping fs root check as cluster-env/manage_hive_fsroot is disabled"
        )
        return

    if not params.fs_root.startswith("hdfs://"):
        Logger.info(
            "Skipping fs root check as fs_root does not start with hdfs://")
        return

    metatool_cmd = format("hive --config {conf_dir} --service metatool")
    cmd = as_user(format("{metatool_cmd} -listFSRoot", env={'PATH': execution_path}), params.hive_user) \
          + format(" 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1")
    code, out = shell.call(cmd)

    if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip(
    ):
        out = out.strip()
        cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}")
        Execute(cmd,
                user=params.hive_user,
                environment={'PATH': execution_path})
Example #22
0
def create_hive_metastore_schema():
    import params

    SYS_DB_CREATED_FILE = "/etc/hive/sys.db.created"

    if os.path.isfile(SYS_DB_CREATED_FILE):
        Logger.info("Sys DB is already created")
        return

    create_hive_schema_cmd = format(
        "export HIVE_CONF_DIR={hive_server_conf_dir} ; "
        "{hive_schematool_bin}/schematool -initSchema "
        "-dbType hive "
        "-metaDbType {hive_metastore_db_type} "
        "-userName {hive_metastore_user_name} "
        "-passWord {hive_metastore_user_passwd!p} "
        "-verbose")

    check_hive_schema_created_cmd = as_user(
        format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
               "{hive_schematool_bin}/schematool -info "
               "-dbType hive "
               "-metaDbType {hive_metastore_db_type} "
               "-userName {hive_metastore_user_name} "
               "-passWord {hive_metastore_user_passwd!p} "
               "-verbose"), params.hive_user)

    # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords.
    # Fixing it with the hack below:
    quoted_hive_metastore_user_passwd = quote_bash_args(
        quote_bash_args(params.hive_metastore_user_passwd))
    if quoted_hive_metastore_user_passwd.startswith("'") and quoted_hive_metastore_user_passwd.endswith("'") \
        or quoted_hive_metastore_user_passwd.startswith('"') and quoted_hive_metastore_user_passwd.endswith('"'):
        quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[
            1:-1]
    Logger.sensitive_strings[repr(create_hive_schema_cmd)] = repr(
        create_hive_schema_cmd.replace(
            format("-passWord {quoted_hive_metastore_user_passwd}"),
            "-passWord " + utils.PASSWORDS_HIDE_STRING))
    Logger.sensitive_strings[repr(check_hive_schema_created_cmd)] = repr(
        check_hive_schema_created_cmd.replace(
            format("-passWord {quoted_hive_metastore_user_passwd}"),
            "-passWord " + utils.PASSWORDS_HIDE_STRING))

    try:
        if params.security_enabled:
            hive_kinit_cmd = format(
                "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; "
            )
            Execute(hive_kinit_cmd, user=params.hive_user)

        Execute(create_hive_schema_cmd,
                not_if=check_hive_schema_created_cmd,
                user=params.hive_user)
        Execute("touch " + SYS_DB_CREATED_FILE, user="******")
        Logger.info("Sys DB is set up")
    except:
        Logger.error("Could not create Sys DB.")
        Logger.error(traceback.format_exc())
Example #23
0
def setup_metastore():
    import params

    if params.hive_metastore_site_supported:
        hivemetastore_site_config = get_config("hivemetastore-site")
        if hivemetastore_site_config:
            XmlConfig("hivemetastore-site.xml",
                      conf_dir=params.hive_server_conf_dir,
                      configurations=params.config['configurations']
                      ['hivemetastore-site'],
                      configuration_attributes=params.
                      config['configuration_attributes']['hivemetastore-site'],
                      owner=params.hive_user,
                      group=params.user_group,
                      mode=0600)

    File(os.path.join(params.hive_server_conf_dir,
                      "hadoop-metrics2-hivemetastore.properties"),
         owner=params.hive_user,
         group=params.user_group,
         content=Template("hadoop-metrics2-hivemetastore.properties.j2"),
         mode=0600)

    File(params.start_metastore_path,
         mode=0755,
         content=StaticFile('startMetastore.sh'))
    if params.init_metastore_schema:
        create_schema_cmd = format(
            "export HIVE_CONF_DIR={hive_server_conf_dir} ; "
            "{hive_schematool_bin}/schematool -initSchema "
            "-dbType {hive_metastore_db_type} "
            "-userName {hive_metastore_user_name} "
            "-passWord {hive_metastore_user_passwd!p} -verbose")

        check_schema_created_cmd = as_user(
            format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                   "{hive_schematool_bin}/schematool -info "
                   "-dbType {hive_metastore_db_type} "
                   "-userName {hive_metastore_user_name} "
                   "-passWord {hive_metastore_user_passwd!p} -verbose"),
            params.hive_user)

        # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords.
        # Fixing it with the hack below:
        quoted_hive_metastore_user_passwd = quote_bash_args(
            quote_bash_args(params.hive_metastore_user_passwd))
        if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \
            or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"':
            quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[
                1:-1]
        Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(
            check_schema_created_cmd.replace(
                format("-passWord {quoted_hive_metastore_user_passwd}"),
                "-passWord " + utils.PASSWORDS_HIDE_STRING))

        Execute(create_schema_cmd,
                not_if=check_schema_created_cmd,
                user=params.hive_user)
def get_user_call_output(command,
                         user,
                         quiet=False,
                         is_checked_call=True,
                         **call_kwargs):
    """
  This function eliminates only output of command inside the su, ignoring the su ouput itself.
  This is useful since some users have motd messages setup by default on su -l. 
  
  @return: code, stdout, stderr
  """
    command_string = shell.string_cmd_from_args_list(command) if isinstance(
        command, (list, tuple)) else command
    out_files = []

    try:
        out_files.append(tempfile.NamedTemporaryFile())
        out_files.append(tempfile.NamedTemporaryFile())

        # other user should be able to write to it
        for f in out_files:
            os.chmod(f.name, 0666)

        command_string += " 1>" + out_files[0].name
        command_string += " 2>" + out_files[1].name

        code, _ = shell.call(shell.as_user(command_string, user),
                             quiet=quiet,
                             **call_kwargs)

        files_output = []
        for f in out_files:
            files_output.append(f.read().decode("utf-8").strip('\n'))

        if code:
            all_output = files_output[1] + '\n' + files_output[0]
            err_msg = Logger.filter_text(
                ("Execution of '%s' returned %d. %s") %
                (command_string, code, all_output))

            if is_checked_call:
                raise Fail(err_msg)
            else:
                Logger.warning(err_msg)

        result = code, files_output[0], files_output[1]

        caller_filename = sys._getframe(1).f_code.co_filename
        is_internal_call = shell.NOT_LOGGED_FOLDER in caller_filename
        if quiet == False or (quiet == None and not is_internal_call):
            log_msg = "{0} returned {1}".format(get_user_call_output.__name__,
                                                result)
            Logger.info(log_msg)

        return result
    finally:
        for f in out_files:
            f.close()
Example #25
0
def service(componentName, action='start', serviceName='yarn'):

  import params

  if serviceName == 'mapreduce' and componentName == 'historyserver':
    delete_pid_file = True
    daemon = format("{mapred_bin}/mr-jobhistory-daemon.sh")
    pid_file = format("{mapred_pid_dir}/mapred-{mapred_user}-{componentName}.pid")
    usr = params.mapred_user
  else:
    # !!! yarn-daemon.sh deletes the PID for us; if we remove it the script
    # may not work correctly when stopping the service
    delete_pid_file = False
    daemon = format("{yarn_bin}/yarn-daemon.sh")
    pid_file = format("{yarn_pid_dir}/yarn-{yarn_user}-{componentName}.pid")
    usr = params.yarn_user

  cmd = format("export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {daemon} --config {hadoop_conf_dir}")

  if action == 'start':
    daemon_cmd = format("{ulimit_cmd} {cmd} start {componentName}")
    check_process = as_user(format("ls {pid_file} && ps -p `cat {pid_file}`"), user=usr)

    # Remove the pid file if its corresponding process is not running.
    File(pid_file, action = "delete", not_if = check_process)

    if componentName == 'timelineserver' and serviceName == 'yarn':
      File(params.ats_leveldb_lock_file,
         action = "delete",
         only_if = format("ls {params.ats_leveldb_lock_file}"),
         not_if = check_process,
         ignore_failures = True
      )

    # Attempt to start the process. Internally, this is skipped if the process is already running.
    Execute(daemon_cmd, user = usr, not_if = check_process)

    # Ensure that the process with the expected PID exists.
    Execute(check_process,
            not_if = check_process,
            tries=5,
            try_sleep=1,
    )

  elif action == 'stop':
    daemon_cmd = format("{cmd} stop {componentName}")
    Execute(daemon_cmd, user=usr)

    # !!! yarn-daemon doesn't need us to delete PIDs
    if delete_pid_file is True:
      File(pid_file, action="delete")


  elif action == 'refreshQueues':
    rm_kinit_cmd = params.rm_kinit_cmd
    refresh_cmd = format("{rm_kinit_cmd} export HADOOP_LIBEXEC_DIR={hadoop_libexec_dir} && {yarn_container_bin}/yarn rmadmin -refreshQueues")
    Execute(refresh_cmd, user=usr)
Example #26
0
def prepare_upgrade_save_namespace(hdfs_binary):
    """
  During a NonRolling (aka Express Upgrade), preparing the NameNode requires saving the namespace.
  :param hdfs_binary: name/path of the HDFS binary to use
  """
    import params

    save_namespace_cmd = format("{hdfs_binary} dfsadmin -saveNamespace")
    try:
        Logger.info("Checkpoint the current namespace.")
        as_user(save_namespace_cmd,
                params.hdfs_user,
                env={'PATH': params.hadoop_bin_dir})
    except Exception, e:
        message = format(
            "Could not save the NameSpace. As the HDFS user, call this command: {save_namespace_cmd}"
        )
        Logger.error(message)
        raise Fail(message)
Example #27
0
def webhcat_service(action='start', rolling_restart=False):
  import params

  environ = {
    'HADOOP_HOME': params.hadoop_home
  }

  cmd = format('{webhcat_bin_dir}/webhcat_server.sh')

  if action == 'start':
    if rolling_restart and params.version:
      environ['HADOOP_HOME'] = format("/usr/hdp/{version}/hadoop")

    daemon_cmd = format('cd {hcat_pid_dir} ; {cmd} start')
    no_op_test = as_user(format('ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1'), user=params.webhcat_user)
    Execute(daemon_cmd,
            user=params.webhcat_user,
            not_if=no_op_test,
            environment = environ)
  elif action == 'stop':
    daemon_cmd = format('{cmd} stop')
    Execute(daemon_cmd,
            user = params.webhcat_user,
            environment = environ)

    pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"), user=params.webhcat_user) + "`"
    process_id_exists_command = format("ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1")
    daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}")
    wait_time = 10
    Execute(daemon_hard_kill_cmd,
            not_if = format("! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )")
    )

    # check if stopped the process, else fail the task
    Execute(format("! ({process_id_exists_command})"),
            tries=20,
            try_sleep=3,
    )

    File(params.webhcat_pid_file,
         action="delete",
    )
Example #28
0
def prepare_upgrade_enter_safe_mode(hdfs_binary):
    """
  During a NonRolling (aka Express Upgrade), preparing the NameNode requires first entering Safemode.
  :param hdfs_binary: name/path of the HDFS binary to use
  """
    import params

    safe_mode_enter_cmd = format("{hdfs_binary} dfsadmin -safemode enter")
    safe_mode_enter_and_check_for_on = format(
        "{safe_mode_enter_cmd} | grep 'Safe mode is ON'")
    try:
        # Safe to call if already in Safe Mode
        Logger.info("Enter SafeMode if not already in it.")
        as_user(safe_mode_enter_and_check_for_on,
                params.hdfs_user,
                env={'PATH': params.hadoop_bin_dir})
    except Exception, e:
        message = format(
            "Could not enter safemode. As the HDFS user, call this command: {safe_mode_enter_cmd}"
        )
        Logger.error(message)
        raise Fail(message)
def webhcat_service(action="start", rolling_restart=False):
    import params

    environ = {"HADOOP_HOME": params.hadoop_home}

    cmd = format("{webhcat_bin_dir}/webhcat_server.sh")

    if action == "start":
        if rolling_restart and params.version:
            environ["HADOOP_HOME"] = format("/usr/hdp/{version}/hadoop")

        daemon_cmd = format("cd {hcat_pid_dir} ; {cmd} start")
        no_op_test = as_user(
            format("ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p `cat {webhcat_pid_file}` >/dev/null 2>&1"),
            user=params.webhcat_user,
        )
        Execute(daemon_cmd, user=params.webhcat_user, not_if=no_op_test, environment=environ)
    elif action == "stop":
        daemon_cmd = format("{cmd} stop")
        Execute(daemon_cmd, user=params.webhcat_user, environment=environ)

        pid_expression = "`" + as_user(format("cat {webhcat_pid_file}"), user=params.webhcat_user) + "`"
        process_id_exists_command = format(
            "ls {webhcat_pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1"
        )
        daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}")
        wait_time = 10
        Execute(
            daemon_hard_kill_cmd,
            not_if=format(
                "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"
            ),
        )

        # check if stopped the process, else fail the task
        Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3)

        File(params.webhcat_pid_file, action="delete")
def check_fs_root():
  import params  
  metatool_cmd = format("hive --config {hive_server_conf_dir} --service metatool")
  cmd = as_user(format("{metatool_cmd} -listFSRoot", env={'PATH': params.execute_path}), params.hive_user) \
        + format(" 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1")
  code, out = shell.call(cmd)

  if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip():
    out = out.strip()
    cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}")
    Execute(cmd,
            user=params.hive_user,
            environment={'PATH': params.execute_path}
    )
Example #31
0
def get_user_call_output(command,
                         user,
                         quiet=False,
                         is_checked_call=True,
                         **call_kwargs):
    """
  This function eliminates only output of command inside the su, ignoring the su ouput itself.
  This is useful since some users have motd messages setup by default on su -l. 
  
  @return: code, stdout, stderr
  """
    command_string = shell.string_cmd_from_args_list(command) if isinstance(
        command, (list, tuple)) else command
    out_files = []

    try:
        out_files.append(tempfile.NamedTemporaryFile())
        out_files.append(tempfile.NamedTemporaryFile())

        # other user should be able to write to it
        for f in out_files:
            os.chmod(f.name, 0666)

        command_string += " 1>" + out_files[0].name
        command_string += " 2>" + out_files[1].name

        code, _ = shell.call(shell.as_user(command_string, user),
                             quiet=quiet,
                             **call_kwargs)

        files_output = []
        for f in out_files:
            files_output.append(f.read().strip('\n'))

        if code:
            all_output = files_output[1] + '\n' + files_output[0]
            err_msg = Logger.filter_text(
                ("Execution of '%s' returned %d. %s") %
                (command_string, code, all_output))

            if is_checked_call:
                raise Fail(err_msg)
            else:
                Logger.warning(err_msg)

        return code, files_output[0], files_output[1]
    finally:
        for f in out_files:
            f.close()
Example #32
0
def check_fs_root():
    import params
    metatool_cmd = format(
        "hive --config {hive_server_conf_dir} --service metatool")
    cmd = as_user(format("{metatool_cmd} -listFSRoot", env={'PATH': params.execute_path}), params.hive_user) \
          + format(" 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1")
    code, out = shell.call(cmd)

    if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip(
    ):
        out = out.strip()
        cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}")
        Execute(cmd,
                user=params.hive_user,
                environment={'PATH': params.execute_path})
Example #33
0
def check_fs_root():
    import params

    if not params.fs_root.startswith("hdfs://"):
        Logger.info("Skipping fs root check as fs_root does not start with hdfs://")
        return

    metatool_cmd = format("hive --config {hive_server_conf_dir} --service metatool")
    cmd = as_user(format("{metatool_cmd} -listFSRoot", env={"PATH": params.execute_path}), params.hive_user) + format(
        " 2>/dev/null | grep hdfs:// | cut -f1,2,3 -d '/' | grep -v '{fs_root}' | head -1"
    )
    code, out = shell.call(cmd)

    if code == 0 and out.strip() != "" and params.fs_root.strip() != out.strip():
        out = out.strip()
        cmd = format("{metatool_cmd} -updateLocation {fs_root} {out}")
        Execute(cmd, user=params.hive_user, environment={"PATH": params.execute_path})
def get_user_call_output(command, user, quiet=False, is_checked_call=True, **call_kwargs):
  """
  This function eliminates only output of command inside the su, ignoring the su ouput itself.
  This is useful since some users have motd messages setup by default on su -l. 
  
  @return: code, stdout, stderr
  """
  command_string = shell.string_cmd_from_args_list(command) if isinstance(command, (list, tuple)) else command
  out_files = []
  
  try:
    out_files.append(tempfile.NamedTemporaryFile())
    out_files.append(tempfile.NamedTemporaryFile())
    
    # other user should be able to write to it
    for f in out_files:
      os.chmod(f.name, 0666)
    
    command_string += " 1>" + out_files[0].name
    command_string += " 2>" + out_files[1].name
    
    code, _ = shell.call(shell.as_user(command_string, user), quiet=quiet, **call_kwargs)
    
    files_output = []
    for f in out_files:
      files_output.append(f.read().strip('\n'))
      
    if code:
      all_output = files_output[1] + '\n' + files_output[0]
      err_msg = Logger.filter_text(("Execution of '%s' returned %d. %s") % (command_string, code, all_output))
      
      if is_checked_call:
        raise Fail(err_msg)
      else:
        Logger.warning(err_msg)      
    
    return code, files_output[0], files_output[1]
  finally:
    for f in out_files:
      f.close()
Example #35
0
def service(name, action='start'):
    import params
    import status_params

    pid_file = status_params.pid_files[name]
    no_op_test = as_user(format(
        "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"
    ),
                         user=params.storm_user)

    if name == "logviewer" or name == "drpc":
        tries_count = 12
    else:
        tries_count = 6

    if name == 'ui':
        process_grep = "backtype.storm.ui.core$"
    elif name == "rest_api":
        process_grep = format("{rest_lib_dir}/storm-rest-.*\.jar$")
    else:
        process_grep = format("storm.daemon.{name}$")

    find_proc = format("{jps_binary} -l  | grep {process_grep}")
    write_pid = format("{find_proc} | awk {{'print $1'}} > {pid_file}")
    crt_pid_cmd = format("{find_proc} && {write_pid}")
    storm_env = format(
        "source {conf_dir}/storm-env.sh ; export PATH=$JAVA_HOME/bin:$PATH")

    if action == "start":
        if name == "rest_api":
            process_cmd = format(
                "{storm_env} ; java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server"
            )
            cmd = format(
                "{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log 2>&1"
            )
        else:
            cmd = format(
                "{storm_env} ; storm {name} > {log_dir}/{name}.out 2>&1")

        Execute(cmd,
                not_if=no_op_test,
                user=params.storm_user,
                wait_for_finish=False,
                path=params.storm_bin_dir)

        Execute(crt_pid_cmd,
                user=params.storm_user,
                logoutput=True,
                tries=tries_count,
                try_sleep=10,
                path=params.storm_bin_dir)

    elif action == "stop":
        process_dont_exist = format("! ({no_op_test})")
        if os.path.exists(pid_file):
            pid = get_user_call_output.get_user_call_output(
                format("! test -f {pid_file} ||  cat {pid_file}"),
                user=params.storm_user)[1]

            # if multiple processes are running (for example user can start logviewer from console)
            # there can be more than one id
            pid = pid.replace("\n", " ")

            Execute(format("{sudo} kill {pid}"), not_if=process_dont_exist)

            Execute(
                format("{sudo} kill -9 {pid}"),
                not_if=format(
                    "sleep 2; {process_dont_exist} || sleep 20; {process_dont_exist}"
                ),
                ignore_failures=True)

            File(pid_file, action="delete")
def namenode(action=None, hdfs_binary=None, do_format=True, upgrade_type=None, env=None):
  if action is None:
    raise Fail('"action" parameter is required for function namenode().')

  if action in ["start", "stop"] and hdfs_binary is None:
    raise Fail('"hdfs_binary" parameter is required for function namenode().')

  if action == "configure":
    import params
    #we need this directory to be present before any action(HA manual steps for
    #additional namenode)
    create_name_dirs(params.dfs_name_dir)
  elif action == "start":
    Logger.info("Called service {0} with upgrade_type: {1}".format(action, str(upgrade_type)))
    setup_ranger_hdfs(upgrade_type=upgrade_type)
    import params
    if do_format and not params.hdfs_namenode_format_disabled:
      format_namenode()
      pass

    File(params.exclude_file_path,
         content=Template("exclude_hosts_list.j2"),
         owner=params.hdfs_user,
         group=params.user_group
    )

    if params.dfs_ha_enabled and \
      params.dfs_ha_namenode_standby is not None and \
      params.hostname == params.dfs_ha_namenode_standby:
        # if the current host is the standby NameNode in an HA deployment
        # run the bootstrap command, to start the NameNode in standby mode
        # this requires that the active NameNode is already up and running,
        # so this execute should be re-tried upon failure, up to a timeout
        success = bootstrap_standby_namenode(params)
        if not success:
          raise Fail("Could not bootstrap standby namenode")

    if upgrade_type == "rolling" and params.dfs_ha_enabled:
      # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried
      # to kill ZKFC manually, so we need to start it if not already running.
      safe_zkfc_op(action, env)

    options = ""
    if upgrade_type == "rolling":
      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"
        
    elif upgrade_type == "nonrolling":
      is_previous_image_dir = is_previous_fs_image()
      Logger.info(format("Previous file system image dir present is {is_previous_image_dir}"))

      if params.upgrade_direction == Direction.UPGRADE:
        options = "-rollingUpgrade started"
      elif params.upgrade_direction == Direction.DOWNGRADE:
        options = "-rollingUpgrade downgrade"

    Logger.info(format("Option for start command: {options}"))

    service(
      action="start",
      name="namenode",
      user=params.hdfs_user,
      options=options,
      create_pid_dir=True,
      create_log_dir=True
    )

    if params.security_enabled:
      Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
              user = params.hdfs_user)

    if params.dfs_ha_enabled:
      is_active_namenode_cmd = as_user(format("{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"), params.hdfs_user, env={'PATH':params.hadoop_bin_dir})
    else:
      is_active_namenode_cmd = True
    
    # During NonRolling Upgrade, both NameNodes are initially down,
    # so no point in checking if this is the active or standby.
    if upgrade_type == "nonrolling":
      is_active_namenode_cmd = False

    # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
    # no-HA                 | ON -> OFF                | Yes                      |
    # HA and active         | ON -> OFF                | Yes                      |
    # HA and standby        | no change                | no check                 |
    # RU with HA on active  | ON -> OFF                | Yes                      |
    # RU with HA on standby | ON -> OFF                | Yes                      |
    # EU with HA on active  | no change                | no check                 |
    # EU with HA on standby | no change                | no check                 |
    # EU non-HA             | no change                | no check                 |

    check_for_safemode_off = False
    msg = ""
    if params.dfs_ha_enabled:
      if upgrade_type is not None:
        check_for_safemode_off = True
        msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
      else:
        Logger.info("Wait for NameNode to become active.")
        if is_active_namenode(hdfs_binary): # active
          check_for_safemode_off = True
          msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
        else:
          msg = "Will remain in the current safemode state."
    else:
      msg = "Must wait to leave safemode since High Availability is not enabled."
      check_for_safemode_off = True

    Logger.info(msg)

    # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down.
    stay_in_safe_mode = False
    if upgrade_type == "nonrolling":
      stay_in_safe_mode = True

    if check_for_safemode_off:
      Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
      if not stay_in_safe_mode:
        wait_for_safemode_off(hdfs_binary)

    # Always run this on non-HA, or active NameNode during HA.
    create_hdfs_directories(is_active_namenode_cmd)
    create_ranger_audit_hdfs_directories(is_active_namenode_cmd)

  elif action == "stop":
    import params
    service(
      action="stop", name="namenode", 
      user=params.hdfs_user
    )
  elif action == "status":
    import status_params
    check_process_status(status_params.namenode_pid_file)
  elif action == "decommission":
    decommission()
Example #37
0
def oozie_server_specific():
  import params
  
  no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user)
  
  File(params.pid_file,
    action="delete",
    not_if=no_op_test
  )
  
  oozie_server_directories = [format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir]
  Directory( oozie_server_directories,
    owner = params.oozie_user,
    group = params.user_group,
    mode = 0755,
    recursive = True,
    cd_access="a",
  )
  
  Directory(params.oozie_libext_dir,
            recursive=True,
  )
  
  hashcode_file = format("{oozie_home}/.hashcode")
  skip_recreate_sharelib = format("test -f {hashcode_file} && test -d {oozie_home}/share")

  untar_sharelib = ('tar','-xvf',format('{oozie_home}/oozie-sharelib.tar.gz'),'-C',params.oozie_home)

  Execute( untar_sharelib,    # time-expensive
    not_if  = format("{no_op_test} || {skip_recreate_sharelib}"), 
    sudo = True,
  )

  configure_cmds = []
  configure_cmds.append(('cp', params.ext_js_path, params.oozie_libext_dir))
  configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}')))
  configure_cmds.append(('chown', '-RL', format('{oozie_user}:{user_group}'), params.oozie_webapps_conf_dir))
  
  Execute( configure_cmds,
    not_if  = no_op_test,
    sudo = True,
  )

  # download the database JAR
  download_database_library_if_needed()

  #falcon el extension
  if params.has_falcon_host:
    Execute(format('{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}'),
      not_if  = no_op_test)

    Execute(format('{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'),
      not_if  = no_op_test)

  if params.lzo_enabled and len(params.all_lzo_packages) > 0:
    Package(params.all_lzo_packages)
    Execute(format('{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'),
      not_if  = no_op_test,
    )

  prepare_war_cmd_file = format("{oozie_home}/.prepare_war_cmd")
  prepare_war_cmd = format("cd {oozie_tmp_dir} && {oozie_setup_sh} prepare-war {oozie_secure}").strip()
  skip_prepare_war_cmd = format("test -f {prepare_war_cmd_file} && [[ `cat {prepare_war_cmd_file}` == '{prepare_war_cmd}' ]]")

  Execute(prepare_war_cmd,    # time-expensive
    user = params.oozie_user,
    not_if  = format("{no_op_test} || {skip_recreate_sharelib} && {skip_prepare_war_cmd}")
  )
  File(hashcode_file,
       mode = 0644,
  )
  File(prepare_war_cmd_file,
       content = prepare_war_cmd,
       mode = 0644,
  )

  if params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0:
    # Create hive-site and tez-site configs for oozie
    Directory(params.hive_conf_dir,
        recursive = True,
        owner = params.oozie_user,
        group = params.user_group
    )
    if 'hive-site' in params.config['configurations']:
      XmlConfig("hive-site.xml",
        conf_dir=params.hive_conf_dir,
        configurations=params.config['configurations']['hive-site'],
        configuration_attributes=params.config['configuration_attributes']['hive-site'],
        owner=params.oozie_user,
        group=params.user_group,
        mode=0644
    )
    if 'tez-site' in params.config['configurations']:
      XmlConfig( "tez-site.xml",
        conf_dir = params.hive_conf_dir,
        configurations = params.config['configurations']['tez-site'],
        configuration_attributes=params.config['configuration_attributes']['tez-site'],
        owner = params.oozie_user,
        group = params.user_group,
        mode = 0664
    )
  Execute(('chown', '-R', format("{oozie_user}:{user_group}"), params.oozie_server_dir), 
          sudo=True
  )
Example #38
0
def oozie_server_specific(upgrade_type):
  import params
  
  no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user)
  
  File(params.pid_file,
    action="delete",
    not_if=no_op_test
  )
  
  oozie_server_directories = [format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir]
  Directory( oozie_server_directories,
    owner = params.oozie_user,
    group = params.user_group,
    mode = 0755,
    create_parents = True,
    cd_access="a",
  )
  
  Directory(params.oozie_libext_dir,
            create_parents = True,
  )
  
  hashcode_file = format("{oozie_home}/.hashcode")
  skip_recreate_sharelib = format("test -f {hashcode_file} && test -d {oozie_home}/share")

  untar_sharelib = ('tar','-xvf',format('{oozie_home}/oozie-sharelib.tar.gz'),'-C',params.oozie_home)

  Execute( untar_sharelib,    # time-expensive
    not_if  = format("{no_op_test} || {skip_recreate_sharelib}"), 
    sudo = True,
  )

  configure_cmds = []
  # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path
  source_ext_zip_paths = get_oozie_ext_zip_source_paths(upgrade_type, params)
  
  # Copy the first oozie ext-2.2.zip file that is found.
  # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP.
  if source_ext_zip_paths is not None:
    for source_ext_zip_path in source_ext_zip_paths:
      if os.path.isfile(source_ext_zip_path):
        configure_cmds.append(('cp', source_ext_zip_path, params.oozie_libext_dir))
        configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}')))

        Execute(configure_cmds,
                not_if=no_op_test,
                sudo=True,
                )
        break
  
  
  Directory(params.oozie_webapps_conf_dir,
            owner = params.oozie_user,
            group = params.user_group,
            recursive_ownership = True,
            recursion_follow_links = True,
  )

  # download the database JAR
  download_database_library_if_needed()

  #falcon el extension
  if params.has_falcon_host:
    Execute(format('{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}'),
      not_if  = no_op_test)

    Execute(format('{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'),
      not_if  = no_op_test)

  if params.lzo_enabled and len(params.all_lzo_packages) > 0:
    Package(params.all_lzo_packages,
            retry_on_repo_unavailability=params.agent_stack_retry_on_unavailability,
            retry_count=params.agent_stack_retry_count)
    Execute(format('{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'),
      not_if  = no_op_test,
    )

  prepare_war(params)

  File(hashcode_file,
       mode = 0644,
  )

  if params.stack_version_formatted and check_stack_feature(StackFeature.OOZIE_CREATE_HIVE_TEZ_CONFIGS, params.stack_version_formatted):
    # Create hive-site and tez-site configs for oozie
    Directory(params.hive_conf_dir,
        create_parents = True,
        owner = params.oozie_user,
        group = params.user_group
    )
    if 'hive-site' in params.config['configurations']:
      hive_site_config = update_credential_provider_path(params.config['configurations']['hive-site'],
                                                         'hive-site',
                                                         os.path.join(params.hive_conf_dir, 'hive-site.jceks'),
                                                         params.oozie_user,
                                                         params.user_group
                                                         )
      XmlConfig("hive-site.xml",
        conf_dir=params.hive_conf_dir,
        configurations=hive_site_config,
        configuration_attributes=params.config['configuration_attributes']['hive-site'],
        owner=params.oozie_user,
        group=params.user_group,
        mode=0644
    )
    if 'tez-site' in params.config['configurations']:
      XmlConfig( "tez-site.xml",
        conf_dir = params.hive_conf_dir,
        configurations = params.config['configurations']['tez-site'],
        configuration_attributes=params.config['configuration_attributes']['tez-site'],
        owner = params.oozie_user,
        group = params.user_group,
        mode = 0664
    )

    # If Atlas is also installed, need to generate Atlas Hive hook (hive-atlas-application.properties file) in directory
    # {stack_root}/{current_version}/atlas/hook/hive/
    # Because this is a .properties file instead of an xml file, it will not be read automatically by Oozie.
    # However, should still save the file on this host so that can upload it to the Oozie Sharelib in DFS.
    if has_atlas_in_cluster():
      atlas_hook_filepath = os.path.join(params.hive_conf_dir, params.atlas_hook_filename)
      Logger.info("Has atlas in cluster, will save Atlas Hive hook into location %s" % str(atlas_hook_filepath))
      setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.oozie_user, params.user_group)

  Directory(params.oozie_server_dir,
    owner = params.oozie_user,
    group = params.user_group,
    recursive_ownership = True,  
  )
  if params.security_enabled:
    File(os.path.join(params.conf_dir, 'zkmigrator_jaas.conf'),
         owner=params.oozie_user,
         group=params.user_group,
         content=Template("zkmigrator_jaas.conf.j2")
         )
Example #39
0
  def action_run(self):

    path = self.resource.path
    dest_dir = self.resource.dest_dir
    dest_file = self.resource.dest_file
    kinnit_if_needed = self.resource.kinnit_if_needed
    user = self.resource.user   # user to perform commands as. If not provided, default to the owner
    owner = self.resource.owner
    group = self.resource.group
    mode = self.resource.mode
    hdfs_usr=self.resource.hdfs_user
    hadoop_conf_path = self.resource.hadoop_conf_dir
    bin_dir = self.resource.hadoop_bin_dir


    if dest_file:
      copy_cmd = format("fs -copyFromLocal {path} {dest_dir}/{dest_file}")
      dest_path = dest_dir + dest_file if dest_dir.endswith(os.sep) else dest_dir + os.sep + dest_file
    else:
      dest_file_name = os.path.split(path)[1]
      copy_cmd = format("fs -copyFromLocal {path} {dest_dir}")
      dest_path = dest_dir + os.sep + dest_file_name
    # Need to run unless as resource user
    
    if kinnit_if_needed:
      Execute(kinnit_if_needed, 
              user=user if user else owner,
      )
    
    unless_cmd = as_user(format("PATH=$PATH:{bin_dir} hadoop fs -ls {dest_path}"), user if user else owner)

    ExecuteHadoop(copy_cmd,
                  not_if=unless_cmd,
                  user=user if user else owner,
                  bin_dir=bin_dir,
                  conf_dir=hadoop_conf_path
                  )

    if not owner:
      chown = None
    else:
      if not group:
        chown = owner
      else:
        chown = format('{owner}:{group}')

    if chown:
      chown_cmd = format("fs -chown {chown} {dest_path}")

      ExecuteHadoop(chown_cmd,
                    user=hdfs_usr,
                    bin_dir=bin_dir,
                    conf_dir=hadoop_conf_path)
    pass

    if mode:
      dir_mode = oct(mode)[1:]
      chmod_cmd = format('fs -chmod {dir_mode} {dest_path}')

      ExecuteHadoop(chmod_cmd,
                    user=hdfs_usr,
                    bin_dir=bin_dir,
                    conf_dir=hadoop_conf_path)
    pass
Example #40
0
def namenode(action=None,
             hdfs_binary=None,
             do_format=True,
             upgrade_type=None,
             env=None):
    if action is None:
        raise Fail('"action" parameter is required for function namenode().')

    if action in ["start", "stop"] and hdfs_binary is None:
        raise Fail(
            '"hdfs_binary" parameter is required for function namenode().')

    if action == "configure":
        import params
        #we need this directory to be present before any action(HA manual steps for
        #additional namenode)
        create_name_dirs(params.dfs_name_dir)
    elif action == "start":
        Logger.info("Called service {0} with upgrade_type: {1}".format(
            action, str(upgrade_type)))
        setup_ranger_hdfs(upgrade_type=upgrade_type)
        import params
        if do_format:
            format_namenode()
            pass

        File(params.exclude_file_path,
             content=Template("exclude_hosts_list.j2"),
             owner=params.hdfs_user,
             group=params.user_group)

        if params.dfs_ha_enabled and \
          params.dfs_ha_namenode_standby is not None and \
          params.hostname == params.dfs_ha_namenode_standby:
            # if the current host is the standby NameNode in an HA deployment
            # run the bootstrap command, to start the NameNode in standby mode
            # this requires that the active NameNode is already up and running,
            # so this execute should be re-tried upon failure, up to a timeout
            success = bootstrap_standby_namenode(params)
            if not success:
                raise Fail("Could not bootstrap standby namenode")

        if upgrade_type == "rolling" and params.dfs_ha_enabled:
            # Most likely, ZKFC is up since RU will initiate the failover command. However, if that failed, it would have tried
            # to kill ZKFC manually, so we need to start it if not already running.
            safe_zkfc_op(action, env)

        options = ""
        if upgrade_type == "rolling":
            options = "-rollingUpgrade started"
        elif upgrade_type == "nonrolling":
            is_previous_image_dir = is_previous_fs_image()
            Logger.info(
                format(
                    "Previous file system image dir present is {is_previous_image_dir}"
                ))

            if params.dfs_ha_enabled:
                if params.desired_namenode_role is None:
                    raise Fail(
                        "Did not receive parameter \"desired_namenode_role\" to indicate the role that this NameNode should have."
                    )

                if params.desired_namenode_role == "active":
                    # The "-upgrade" command can only be used exactly once. If used more than once during a retry, it will cause problems.
                    options = "" if is_previous_image_dir else "-upgrade"

                if params.desired_namenode_role == "standby":
                    options = "-bootstrapStandby -force"
            else:
                # Both Primary and Secondary NameNode can use the same command.
                options = "" if is_previous_image_dir else "-upgrade"
        Logger.info(format("Option for start command: {options}"))

        service(action="start",
                name="namenode",
                user=params.hdfs_user,
                options=options,
                create_pid_dir=True,
                create_log_dir=True)

        if params.security_enabled:
            Execute(format(
                "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"
            ),
                    user=params.hdfs_user)

        is_namenode_safe_mode_off = format(
            "{hdfs_binary} dfsadmin -fs {namenode_address} -safemode get | grep 'Safe mode is OFF'"
        )
        if params.dfs_ha_enabled:
            is_active_namenode_cmd = as_user(format(
                "{hdfs_binary} --config {hadoop_conf_dir} haadmin -getServiceState {namenode_id} | grep active"
            ),
                                             params.hdfs_user,
                                             env={
                                                 'PATH': params.hadoop_bin_dir
                                             })
        else:
            is_active_namenode_cmd = False

        # During NonRolling Upgrade, both NameNodes are initially down,
        # so no point in checking if this is the active or standby.
        if upgrade_type == "nonrolling":
            is_active_namenode_cmd = False

        # ___Scenario___________|_Expected safemode state__|_Wait for safemode OFF____|
        # no-HA                 | ON -> OFF                | Yes                      |
        # HA and active         | ON -> OFF                | Yes                      |
        # HA and standby        | no change                | no check                 |
        # RU with HA on active  | ON -> OFF                | Yes                      |
        # RU with HA on standby | ON -> OFF                | Yes                      |
        # EU with HA on active  | no change                | no check                 |
        # EU with HA on standby | no change                | no check                 |
        # EU non-HA             | no change                | no check                 |

        check_for_safemode_off = False
        msg = ""
        if params.dfs_ha_enabled:
            if upgrade_type is not None:
                check_for_safemode_off = True
                msg = "Must wait to leave safemode since High Availability is enabled during a Stack Upgrade"
            else:
                # During normal operations, the NameNode is expected to be up.
                code, out = shell.call(
                    is_active_namenode_cmd,
                    logoutput=True)  # If active NN, code will be 0
                if code == 0:  # active
                    check_for_safemode_off = True
                    msg = "Must wait to leave safemode since High Availability is enabled and this is the Active NameNode."
                else:
                    msg = "Will remain in the current safemode state."
        else:
            msg = "Must wait to leave safemode since High Availability is not enabled."
            check_for_safemode_off = True

        Logger.info(msg)

        # During a NonRolling (aka Express Upgrade), stay in safemode since the DataNodes are down.
        stay_in_safe_mode = False
        if upgrade_type == "nonrolling":
            stay_in_safe_mode = True

        if check_for_safemode_off:
            Logger.info("Stay in safe mode: {0}".format(stay_in_safe_mode))
            if not stay_in_safe_mode:
                Logger.info(
                    "Wait to leafe safemode since must transition from ON to OFF."
                )
                try:
                    # Wait up to 30 mins
                    Execute(is_namenode_safe_mode_off,
                            tries=180,
                            try_sleep=10,
                            user=params.hdfs_user,
                            logoutput=True)
                except Fail:
                    Logger.error(
                        "NameNode is still in safemode, please be careful with commands that need safemode OFF."
                    )

        # Always run this on non-HA, or active NameNode during HA.
        create_hdfs_directories(is_active_namenode_cmd)

    elif action == "stop":
        import params
        service(action="stop", name="namenode", user=params.hdfs_user)
    elif action == "status":
        import status_params
        check_process_status(status_params.namenode_pid_file)
    elif action == "decommission":
        decommission()
Example #41
0
def hive_service(name, action='start', upgrade_type=None):

    import params

    if name == 'metastore':
        pid_file = format("{hive_pid_dir}/{hive_metastore_pid}")
        cmd = format(
            "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.log {pid_file} {hive_server_conf_dir} {hive_log_dir}"
        )
    elif name == 'hiveserver2':
        pid_file = format("{hive_pid_dir}/{hive_pid}")
        cmd = format(
            "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.log {pid_file} {hive_server_conf_dir} {hive_log_dir}"
        )

    pid_expression = "`" + as_user(format("cat {pid_file}"),
                                   user=params.hive_user) + "`"
    process_id_exists_command = format(
        "ls {pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1"
    )

    if action == 'start':
        if name == 'hiveserver2':
            check_fs_root()

        daemon_cmd = cmd

        # upgrading hiveserver2 (rolling_restart) means that there is an existing,
        # de-registering hiveserver2; the pid will still exist, but the new
        # hiveserver is spinning up on a new port, so the pid will be re-written
        if upgrade_type == UPGRADE_TYPE_ROLLING:
            process_id_exists_command = None

        if params.security_enabled:
            hive_kinit_cmd = format(
                "{kinit_path_local} -kt {hive_server2_keytab} {hive_principal}; "
            )
            Execute(hive_kinit_cmd, user=params.hive_user)

        Execute(daemon_cmd,
                user=params.hive_user,
                environment={
                    'JAVA_HOME': params.java64_home,
                    'HIVE_CMD': params.hive_cmd
                },
                path=params.execute_path,
                not_if=process_id_exists_command)

        if params.hive_jdbc_driver == "com.mysql.jdbc.Driver" or \
           params.hive_jdbc_driver == "org.postgresql.Driver" or \
           params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver":

            db_connection_check_command = format(
                "{java64_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}"
            )

            Execute(db_connection_check_command,
                    path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
                    tries=5,
                    try_sleep=10)
    elif action == 'stop':

        daemon_kill_cmd = format("{sudo} kill {pid_expression}")
        daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}")

        Execute(daemon_kill_cmd,
                not_if=format("! ({process_id_exists_command})"))

        wait_time = 5
        Execute(
            daemon_hard_kill_cmd,
            not_if=format(
                "( sleep {wait_time} && ! ({process_id_exists_command}) )"))

        # check if stopped the process, else fail the task
        Execute(
            format("! ({process_id_exists_command})"),
            tries=20,
            try_sleep=3,
        )

        File(pid_file, action="delete")
Example #42
0
def hive(name=None):
  import params

  if name == 'hiveserver2':
    # HDP 2.1.* or lower
    if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, "2.2.0.0") < 0:
      params.HdfsResource(params.webhcat_apps_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.webhcat_user,
                            mode=0755
                          )
    
    # Create webhcat dirs.
    if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
      params.HdfsResource(params.hcat_hdfs_user_dir,
                           type="directory",
                           action="create_on_execute",
                           owner=params.hcat_user,
                           mode=params.hcat_hdfs_user_mode
      )

    params.HdfsResource(params.webhcat_hdfs_user_dir,
                         type="directory",
                         action="create_on_execute",
                         owner=params.webhcat_user,
                         mode=params.webhcat_hdfs_user_mode
    )

    # ****** Begin Copy Tarballs ******
    # *********************************
    # HDP 2.2 or higher, copy mapreduce.tar.gz to HDFS
    if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, '2.2') >= 0:
      copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)

    # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode.
    # This can use a different source and dest location to account for both HDP 2.1 and 2.2
    copy_to_hdfs("pig",
                 params.user_group,
                 params.hdfs_user,
                 file_mode=params.tarballs_mode,
                 custom_source_file=params.pig_tar_source,
                 custom_dest_file=params.pig_tar_dest_file,
                 host_sys_prepped=params.host_sys_prepped)
    copy_to_hdfs("hive",
                 params.user_group,
                 params.hdfs_user,
                 file_mode=params.tarballs_mode,
                 custom_source_file=params.hive_tar_source,
                 custom_dest_file=params.hive_tar_dest_file,
                 host_sys_prepped=params.host_sys_prepped)

    wildcard_tarballs = ["sqoop", "hadoop_streaming"]
    for tarball_name in wildcard_tarballs:
      source_file_pattern = eval("params." + tarball_name + "_tar_source")
      dest_dir = eval("params." + tarball_name + "_tar_dest_dir")

      if source_file_pattern is None or dest_dir is None:
        continue

      source_files = glob.glob(source_file_pattern) if "*" in source_file_pattern else [source_file_pattern]
      for source_file in source_files:
        src_filename = os.path.basename(source_file)
        dest_file = os.path.join(dest_dir, src_filename)

        copy_to_hdfs(tarball_name,
                     params.user_group,
                     params.hdfs_user,
                     file_mode=params.tarballs_mode,
                     custom_source_file=source_file,
                     custom_dest_file=dest_file,
                     host_sys_prepped=params.host_sys_prepped)
    # ******* End Copy Tarballs *******
    # *********************************

    # Create Hive Metastore Warehouse Dir
    params.HdfsResource(params.hive_apps_whs_dir,
                         type="directory",
                          action="create_on_execute",
                          owner=params.hive_user,
                          mode=0777
    )

    # Create Hive User Dir
    params.HdfsResource(params.hive_hdfs_user_dir,
                         type="directory",
                          action="create_on_execute",
                          owner=params.hive_user,
                          mode=params.hive_hdfs_user_mode
    )
    
    if not is_empty(params.hive_exec_scratchdir) and not urlparse(params.hive_exec_scratchdir).path.startswith("/tmp"):
      params.HdfsResource(params.hive_exec_scratchdir,
                           type="directory",
                           action="create_on_execute",
                           owner=params.hive_user,
                           group=params.hdfs_user,
                           mode=0777) # Hive expects this dir to be writeable by everyone as it is used as a temp dir
      
    params.HdfsResource(None, action="execute")

  Directory(params.hive_etc_dir_prefix,
            mode=0755
  )

  # We should change configurations for client as well as for server.
  # The reason is that stale-configs are service-level, not component.
  for conf_dir in params.hive_conf_dirs_list:
    fill_conf_dir(conf_dir)

  XmlConfig("hive-site.xml",
            conf_dir=params.hive_config_dir,
            configurations=params.hive_site_config,
            configuration_attributes=params.config['configuration_attributes']['hive-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

  setup_atlas_hive()
  
  if params.hive_specific_configs_supported and name == 'hiveserver2':
    XmlConfig("hiveserver2-site.xml",
              conf_dir=params.hive_server_conf_dir,
              configurations=params.config['configurations']['hiveserver2-site'],
              configuration_attributes=params.config['configuration_attributes']['hiveserver2-site'],
              owner=params.hive_user,
              group=params.user_group,
              mode=0644)
  
  File(format("{hive_config_dir}/hive-env.sh"),
       owner=params.hive_user,
       group=params.user_group,
       content=InlineTemplate(params.hive_env_sh_template)
  )

  # On some OS this folder could be not exists, so we will create it before pushing there files
  Directory(params.limits_conf_dir,
            recursive=True,
            owner='root',
            group='root'
            )

  File(os.path.join(params.limits_conf_dir, 'hive.conf'),
       owner='root',
       group='root',
       mode=0644,
       content=Template("hive.conf.j2")
       )

  if (name == 'metastore' or name == 'hiveserver2') and not os.path.exists(params.target):
    jdbc_connector()

  File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
       content = DownloadSource(format("{jdk_location}{check_db_connection_jar_name}")),
       mode = 0644,
  )

  if name == 'metastore':
    File(params.start_metastore_path,
         mode=0755,
         content=StaticFile('startMetastore.sh')
    )
    if params.init_metastore_schema:
      create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                                 "{hive_bin}/schematool -initSchema "
                                 "-dbType {hive_metastore_db_type} "
                                 "-userName {hive_metastore_user_name} "
                                 "-passWord {hive_metastore_user_passwd!p}")

      check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                                        "{hive_bin}/schematool -info "
                                        "-dbType {hive_metastore_db_type} "
                                        "-userName {hive_metastore_user_name} "
                                        "-passWord {hive_metastore_user_passwd!p}"), params.hive_user)

      # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords.
      # Fixing it with the hack below:
      quoted_hive_metastore_user_passwd = quote_bash_args(quote_bash_args(params.hive_metastore_user_passwd))
      if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \
          or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"':
        quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[1:-1]
      Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(check_schema_created_cmd.replace(
          format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING))

      Execute(create_schema_cmd,
              not_if = check_schema_created_cmd,
              user = params.hive_user
      )
  elif name == 'hiveserver2':
    File(params.start_hiveserver2_path,
         mode=0755,
         content=Template(format('{start_hiveserver2_script}'))
    )

  if name != "client":
    crt_directory(params.hive_pid_dir)
    crt_directory(params.hive_log_dir)
    crt_directory(params.hive_var_lib)
Example #43
0
def service(action=None,
            name=None,
            user=None,
            options="",
            create_pid_dir=False,
            create_log_dir=False):
    """
  :param action: Either "start" or "stop"
  :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc"
  :param user: User to run the command as
  :param options: Additional options to pass to command as a string
  :param create_pid_dir: Create PID directory
  :param create_log_dir: Crate log file directory
  """
    import params

    options = options if options else ""
    pid_dir = format("{hadoop_pid_dir_prefix}/{user}")
    pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid")
    hadoop_env_exports = {'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir}
    log_dir = format("{hdfs_log_dir_prefix}/{user}")

    # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs
    # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542
    if name == "nfs3":
        pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid")
        custom_export = {
            'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user,
            'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir,
            'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir
        }
        hadoop_env_exports.update(custom_export)

    process_id_exists_command = as_sudo(
        ["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file])

    # on STOP directories shouldn't be created
    # since during stop still old dirs are used (which were created during previous start)
    if action != "stop":
        if name == "nfs3":
            Directory(params.hadoop_pid_dir_prefix,
                      mode=0755,
                      owner=params.root_user,
                      group=params.root_group)
        else:
            Directory(params.hadoop_pid_dir_prefix,
                      mode=0755,
                      owner=params.hdfs_user,
                      group=params.user_group)
        if create_pid_dir:
            Directory(pid_dir,
                      owner=user,
                      group=params.user_group,
                      create_parents=True)
        if create_log_dir:
            if name == "nfs3":
                Directory(log_dir,
                          mode=0775,
                          owner=params.root_user,
                          group=params.user_group)
            else:
                Directory(log_dir,
                          owner=user,
                          group=params.user_group,
                          create_parents=True)

    if params.security_enabled and name == "datanode":
        ## The directory where pid files are stored in the secure data environment.
        hadoop_secure_dn_pid_dir = format(
            "{hadoop_pid_dir_prefix}/{hdfs_user}")
        hadoop_secure_dn_pid_file = format(
            "{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid")

        # At datanode_non_root stack version and further, we may start datanode as a non-root even in secure cluster
        if not (params.stack_version_formatted and check_stack_feature(
                StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted)
                ) or params.secure_dn_ports_are_in_use:
            user = "******"
            pid_file = format(
                "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid"
            )

        if action == 'stop' and (params.stack_version_formatted and check_stack_feature(StackFeature.DATANODE_NON_ROOT, params.stack_version_formatted)) and \
          os.path.isfile(hadoop_secure_dn_pid_file):
            # We need special handling for this case to handle the situation
            # when we configure non-root secure DN and then restart it
            # to handle new configs. Otherwise we will not be able to stop
            # a running instance
            user = "******"

            try:
                check_process_status(hadoop_secure_dn_pid_file)

                custom_export = {'HADOOP_SECURE_DN_USER': params.hdfs_user}
                hadoop_env_exports.update(custom_export)

            except ComponentIsNotRunning:
                pass

    hdfs_bin = format("{hadoop_bin}/hdfs")

    if user == "root":
        cmd = [
            hdfs_bin, "--config", params.hadoop_conf_dir, "--daemon", action,
            name
        ]
        if options:
            cmd += [
                options,
            ]
        daemon_cmd = as_sudo(cmd)
    else:
        cmd = format(
            "{ulimit_cmd} {hdfs_bin} --config {hadoop_conf_dir} --daemon {action} {name}"
        )
        if options:
            cmd += " " + options
        daemon_cmd = as_user(cmd, user)

    if action == "start":
        # remove pid file from dead process
        File(pid_file, action="delete", not_if=process_id_exists_command)

        try:
            Execute(daemon_cmd,
                    not_if=process_id_exists_command,
                    environment=hadoop_env_exports)
        except:
            show_logs(log_dir, user)
            raise
    elif action == "stop":
        try:
            Execute(daemon_cmd,
                    only_if=process_id_exists_command,
                    environment=hadoop_env_exports)
        except:
            show_logs(log_dir, user)
            raise
        wait_process_stopped(pid_file)
        File(pid_file, action="delete")
Example #44
0
def hive_service(name, action="start", rolling_restart=False):

    import params

    if name == "metastore":
        pid_file = format("{hive_pid_dir}/{hive_metastore_pid}")
        cmd = format(
            "{start_metastore_path} {hive_log_dir}/hive.out {hive_log_dir}/hive.log {pid_file} {hive_server_conf_dir} {hive_log_dir}"
        )
    elif name == "hiveserver2":
        pid_file = format("{hive_pid_dir}/{hive_pid}")
        cmd = format(
            "{start_hiveserver2_path} {hive_log_dir}/hive-server2.out {hive_log_dir}/hive-server2.log {pid_file} {hive_server_conf_dir} {hive_log_dir}"
        )

    pid_expression = "`" + as_user(format("cat {pid_file}"), user=params.hive_user) + "`"
    process_id_exists_command = format("ls {pid_file} >/dev/null 2>&1 && ps -p {pid_expression} >/dev/null 2>&1")

    if action == "start":
        if name == "hiveserver2":
            check_fs_root()

        daemon_cmd = cmd
        hadoop_home = params.hadoop_home
        hive_bin = "hive"

        # upgrading hiveserver2 (rolling_restart) means that there is an existing,
        # de-registering hiveserver2; the pid will still exist, but the new
        # hiveserver is spinning up on a new port, so the pid will be re-written
        if rolling_restart:
            process_id_exists_command = None

            if params.version:
                import os

                hadoop_home = format("/usr/hdp/{version}/hadoop")
                hive_bin = os.path.join(params.hive_bin, hive_bin)

        Execute(
            daemon_cmd,
            user=params.hive_user,
            environment={"HADOOP_HOME": hadoop_home, "JAVA_HOME": params.java64_home, "HIVE_BIN": hive_bin},
            path=params.execute_path,
            not_if=process_id_exists_command,
        )

        if (
            params.hive_jdbc_driver == "com.mysql.jdbc.Driver"
            or params.hive_jdbc_driver == "org.postgresql.Driver"
            or params.hive_jdbc_driver == "oracle.jdbc.driver.OracleDriver"
        ):

            db_connection_check_command = format(
                "{java64_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{hive_jdbc_connection_url}' {hive_metastore_user_name} {hive_metastore_user_passwd!p} {hive_jdbc_driver}"
            )

            Execute(
                db_connection_check_command, path="/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin", tries=5, try_sleep=10
            )
    elif action == "stop":

        daemon_kill_cmd = format("{sudo} kill {pid_expression}")
        daemon_hard_kill_cmd = format("{sudo} kill -9 {pid_expression}")

        Execute(daemon_kill_cmd, not_if=format("! ({process_id_exists_command})"))

        wait_time = 5
        Execute(
            daemon_hard_kill_cmd,
            not_if=format(
                "! ({process_id_exists_command}) || ( sleep {wait_time} && ! ({process_id_exists_command}) )"
            ),
        )

        # check if stopped the process, else fail the task
        Execute(format("! ({process_id_exists_command})"), tries=20, try_sleep=3)

        File(pid_file, action="delete")
def oozie_service(action = 'start', rolling_restart=False):
  """
  Starts or stops the Oozie service
  :param action: 'start' or 'stop'
  :param rolling_restart: if True, then most of the pre-startup checks are
  skipped since a variation of them was performed during the rolling upgrade
  :return:
  """
  import params

  environment={'OOZIE_CONFIG': params.conf_dir}

  if params.security_enabled:
    if params.oozie_principal is None:
      oozie_principal_with_host = 'missing_principal'
    else:
      oozie_principal_with_host = params.oozie_principal.replace("_HOST", params.hostname)
    kinit_if_needed = format("{kinit_path_local} -kt {oozie_keytab} {oozie_principal_with_host};")
  else:
    kinit_if_needed = ""

  no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user)
  
  if action == 'start':
    start_cmd = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-start.sh")
    
    if params.jdbc_driver_name == "com.mysql.jdbc.Driver" or \
       params.jdbc_driver_name == "com.microsoft.sqlserver.jdbc.SQLServerDriver" or \
       params.jdbc_driver_name == "org.postgresql.Driver" or \
       params.jdbc_driver_name == "oracle.jdbc.driver.OracleDriver":
      db_connection_check_command = format("{java_home}/bin/java -cp {check_db_connection_jar}:{target} org.apache.ambari.server.DBConnectionVerification '{oozie_jdbc_connection_url}' {oozie_metastore_user_name} {oozie_metastore_user_passwd!p} {jdbc_driver_name}")
    else:
      db_connection_check_command = None

    if not rolling_restart:
      if not os.path.isfile(params.target) and params.jdbc_driver_name == "org.postgresql.Driver":
        print format("ERROR: jdbc file {target} is unavailable. Please, follow next steps:\n" \
          "1) Download postgresql-9.0-801.jdbc4.jar.\n2) Create needed directory: mkdir -p {oozie_home}/libserver/\n" \
          "3) Copy postgresql-9.0-801.jdbc4.jar to newly created dir: cp /path/to/jdbc/postgresql-9.0-801.jdbc4.jar " \
          "{oozie_home}/libserver/\n4) Copy postgresql-9.0-801.jdbc4.jar to libext: cp " \
          "/path/to/jdbc/postgresql-9.0-801.jdbc4.jar {oozie_home}/libext/\n")
        exit(1)

      if db_connection_check_command:
        Execute( db_connection_check_command, 
                 tries=5, 
                 try_sleep=10,
                 user=params.oozie_user,
        )

      Execute( format("cd {oozie_tmp_dir} && {oozie_home}/bin/ooziedb.sh create -sqlfile oozie.sql -run"), 
               user = params.oozie_user, not_if = no_op_test,
               ignore_failures = True 
      )
      
      if params.security_enabled:
        Execute(kinit_if_needed,
                user = params.oozie_user,
        )
      
      
      if params.host_sys_prepped:
        print "Skipping creation of oozie sharelib as host is sys prepped"
        hdfs_share_dir_exists = True # skip time-expensive hadoop fs -ls check
      elif WebHDFSUtil.is_webhdfs_available(params.is_webhdfs_enabled, params.default_fs):
        # check with webhdfs is much faster than executing hadoop fs -ls. 
        util = WebHDFSUtil(params.hdfs_site, params.oozie_user, params.security_enabled)
        list_status = util.run_command(params.hdfs_share_dir, 'GETFILESTATUS', method='GET', ignore_status_codes=['404'], assertable_result=False)
        hdfs_share_dir_exists = ('FileStatus' in list_status)
      else:
        # have to do time expensive hadoop fs -ls check.
        hdfs_share_dir_exists = shell.call(format("{kinit_if_needed} hadoop --config {hadoop_conf_dir} dfs -ls {hdfs_share_dir} | awk 'BEGIN {{count=0;}} /share/ {{count++}} END {{if (count > 0) {{exit 0}} else {{exit 1}}}}'"),
                                 user=params.oozie_user)[0]
                                 
      if not hdfs_share_dir_exists:                      
        Execute( params.put_shared_lib_to_hdfs_cmd, 
                 user = params.oozie_user,
                 path = params.execute_path 
        )
        params.HdfsResource(format("{oozie_hdfs_user_dir}/share"),
                             type="directory",
                             action="create_on_execute",
                             mode=0755,
                             recursive_chmod=True,
        )
        params.HdfsResource(None, action="execute")
        

    # start oozie
    Execute( start_cmd, environment=environment, user = params.oozie_user,
      not_if = no_op_test )

  elif action == 'stop':
    stop_cmd  = format("cd {oozie_tmp_dir} && {oozie_home}/bin/oozie-stop.sh")

    # stop oozie
    Execute(stop_cmd, environment=environment, only_if  = no_op_test,
      user = params.oozie_user)

    File(params.pid_file, action = "delete")
Example #46
0
def service(name, action = 'start'):
  import params
  import status_params

  pid_file = status_params.pid_files[name]
  no_op_test = as_user(format(
    "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.storm_user)

  if name == "logviewer" or name == "drpc":
    tries_count = 12
  else:
    tries_count = 6

  if name == 'ui':
    process_grep = "backtype.storm.ui.core$"
  elif name == "rest_api":
    process_grep = format("{rest_lib_dir}/storm-rest-.*\.jar$")
  else:
    process_grep = format("storm.daemon.{name}$")

  find_proc = format("{jps_binary} -l  | grep {process_grep}")
  write_pid = format("{find_proc} | awk {{'print $1'}} > {pid_file}")
  crt_pid_cmd = format("{find_proc} && {write_pid}")
  storm_env = format(
    "source {conf_dir}/storm-env.sh ; export PATH=$JAVA_HOME/bin:$PATH")

  if action == "start":
    if name == "rest_api":
      process_cmd = format(
        "{storm_env} ; java -jar {rest_lib_dir}/`ls {rest_lib_dir} | grep -wE storm-rest-[0-9.-]+\.jar` server")
      cmd = format(
        "{process_cmd} {rest_api_conf_file} > {log_dir}/restapi.log 2>&1")
    else:
      cmd = format("{storm_env} ; storm {name} > {log_dir}/{name}.out 2>&1")

    Execute(cmd,
      not_if = no_op_test,
      user = params.storm_user,
      wait_for_finish = False,
      path = params.storm_bin_dir)

    Execute(crt_pid_cmd,
      user = params.storm_user,
      logoutput = True,
      tries = tries_count,
      try_sleep = 10,
      path = params.storm_bin_dir)

  elif action == "stop":
    process_dont_exist = format("! ({no_op_test})")
    pid = '`' + as_user(format("cat {pid_file}"), user=params.storm_user) + '`'

    Execute(format("{sudo} kill {pid}"),
      not_if = process_dont_exist)

    Execute(format("{sudo} kill -9 {pid}"),
      not_if = format(
        "sleep 2; {process_dont_exist} || sleep 20; {process_dont_exist}"),
      ignore_failures = True)

    File(pid_file, action = "delete")
Example #47
0
 def startRebalancingProcess(threshold, rebalance_env):
   rebalanceCommand = format('hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}')
   return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env)
Example #48
0
def service(action=None, name=None, user=None, options="", create_pid_dir=False,
            create_log_dir=False):
  """
  :param action: Either "start" or "stop"
  :param name: Component name, e.g., "namenode", "datanode", "secondarynamenode", "zkfc"
  :param user: User to run the command as
  :param options: Additional options to pass to command as a string
  :param create_pid_dir: Create PID directory
  :param create_log_dir: Crate log file directory
  """
  import params

  options = options if options else ""
  pid_dir = format("{hadoop_pid_dir_prefix}/{user}")
  pid_file = format("{pid_dir}/hadoop-{user}-{name}.pid")
  hadoop_env_exports = {
    'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir
  }
  log_dir = format("{hdfs_log_dir_prefix}/{user}")

  # NFS GATEWAY is always started by root using jsvc due to rpcbind bugs
  # on Linux such as CentOS6.2. https://bugzilla.redhat.com/show_bug.cgi?id=731542
  if name == "nfs3" :
    pid_file = format("{pid_dir}/hadoop_privileged_nfs3.pid")
    custom_export = {
      'HADOOP_PRIVILEGED_NFS_USER': params.hdfs_user,
      'HADOOP_PRIVILEGED_NFS_PID_DIR': pid_dir,
      'HADOOP_PRIVILEGED_NFS_LOG_DIR': log_dir
    }
    hadoop_env_exports.update(custom_export)

  process_id_exists_command = as_sudo(["test", "-f", pid_file]) + " && " + as_sudo(["pgrep", "-F", pid_file])

  # on STOP directories shouldn't be created
  # since during stop still old dirs are used (which were created during previous start)
  if action != "stop":
    if name == "nfs3":
      Directory(params.hadoop_pid_dir_prefix,
                mode=0755,
                owner=params.root_user,
                group=params.root_group
      )
    else:
      Directory(params.hadoop_pid_dir_prefix,
                  mode=0755,
                  owner=params.hdfs_user,
                  group=params.user_group
      )
    if create_pid_dir:
      Directory(pid_dir,
                owner=user,
                recursive=True)
    if create_log_dir:
      if name == "nfs3":
        Directory(log_dir,
                  mode=0775,
                  owner=params.root_user,
                  group=params.user_group)
      else:
        Directory(log_dir,
                  owner=user,
                  recursive=True)

  if params.security_enabled and name == "datanode":
    ## The directory where pid files are stored in the secure data environment.
    hadoop_secure_dn_pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}")
    hadoop_secure_dn_pid_file = format("{hadoop_secure_dn_pid_dir}/hadoop_secure_dn.pid")

    # At Champlain stack and further, we may start datanode as a non-root even in secure cluster
    if not (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0) or params.secure_dn_ports_are_in_use:
      user = "******"
      pid_file = format(
        "{hadoop_pid_dir_prefix}/{hdfs_user}/hadoop-{hdfs_user}-{name}.pid")

    if action == 'stop' and (params.hdp_stack_version != "" and compare_versions(params.hdp_stack_version, '2.2') >= 0) and \
      os.path.isfile(hadoop_secure_dn_pid_file):
        # We need special handling for this case to handle the situation
        # when we configure non-root secure DN and then restart it
        # to handle new configs. Otherwise we will not be able to stop
        # a running instance 
        user = "******"
        
        try:
          check_process_status(hadoop_secure_dn_pid_file)
          
          custom_export = {
            'HADOOP_SECURE_DN_USER': params.hdfs_user
          }
          hadoop_env_exports.update(custom_export)
          
        except ComponentIsNotRunning:
          pass

  hadoop_daemon = format("{hadoop_bin}/hadoop-daemon.sh")

  if user == "root":
    cmd = [hadoop_daemon, "--config", params.hadoop_conf_dir, action, name]
    if options:
      cmd += [options, ]
    daemon_cmd = as_sudo(cmd)
  else:
    cmd = format("{ulimit_cmd} {hadoop_daemon} --config {hadoop_conf_dir} {action} {name}")
    if options:
      cmd += " " + options
    daemon_cmd = as_user(cmd, user)
     
  if action == "start":
    # remove pid file from dead process
    File(pid_file, action="delete", not_if=process_id_exists_command)
    Execute(daemon_cmd, not_if=process_id_exists_command, environment=hadoop_env_exports)
  elif action == "stop":
    Execute(daemon_cmd, only_if=process_id_exists_command, environment=hadoop_env_exports)
    File(pid_file, action="delete")
  def service_check(self, env):
    import params

    env.set_params(params)
    unique = functions.get_unique_id_and_date()
    dir = '/tmp'
    tmp_file = format("{dir}/{unique}")

    safemode_command = format("dfsadmin -fs {namenode_address} -safemode get | grep OFF")

    if params.security_enabled:
      Execute(format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name}"),
        user=params.hdfs_user
      )
    ExecuteHadoop(safemode_command,
                  user=params.hdfs_user,
                  logoutput=True,
                  conf_dir=params.hadoop_conf_dir,
                  try_sleep=3,
                  tries=20,
                  bin_dir=params.hadoop_bin_dir
    )
    params.HdfsResource(dir,
                        type="directory",
                        action="create_on_execute",
                        mode=0777
    )
    params.HdfsResource(tmp_file,
                        type="file",
                        action="delete_on_execute",
    )

    params.HdfsResource(tmp_file,
                        type="file",
                        source="/etc/passwd",
                        action="create_on_execute"
    )
    params.HdfsResource(None, action="execute")

    if params.has_journalnode_hosts:
      if params.security_enabled:
        for host in params.journalnode_hosts:
          if params.https_only:
            uri = format("https://{host}:{journalnode_port}")
          else:
            uri = format("http://{host}:{journalnode_port}")
          response, errmsg, time_millis = curl_krb_request(params.tmp_dir, params.smoke_user_keytab,
                                                           params.smokeuser_principal, uri, "jn_service_check",
                                                           params.kinit_path_local, False, None, params.smoke_user)
          if not response:
            Logger.error("Cannot access WEB UI on: {0}. Error : {1}", uri, errmsg)
            return 1
      else:
        journalnode_port = params.journalnode_port
        checkWebUIFileName = "checkWebUI.py"
        checkWebUIFilePath = format("{tmp_dir}/{checkWebUIFileName}")
        comma_sep_jn_hosts = ",".join(params.journalnode_hosts)
        checkWebUICmd = format("python {checkWebUIFilePath} -m {comma_sep_jn_hosts} -p {journalnode_port} -s {https_only}")
        File(checkWebUIFilePath,
             content=StaticFile(checkWebUIFileName),
             mode=0775)

        Execute(checkWebUICmd,
                logoutput=True,
                try_sleep=3,
                tries=5,
                user=params.smoke_user
        )

    if params.is_namenode_master:
      if params.has_zkfc_hosts:
        pid_dir = format("{hadoop_pid_dir_prefix}/{hdfs_user}")
        pid_file = format("{pid_dir}/hadoop-{hdfs_user}-zkfc.pid")
        check_zkfc_process_cmd = as_user(format(
          "ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.hdfs_user)
        Execute(check_zkfc_process_cmd,
                logoutput=True,
                try_sleep=3,
                tries=5
        )