Example #1
0
  def action_create(self):
    group = self.group
    if not group:
      command = ['groupadd']
      Logger.info("Adding group %s" % self.resource)
    else:
      command = ['groupmod']
      
      for option_name, attributes in self.options.iteritems():
        if getattr(self.resource, option_name) != None and getattr(self.resource, option_name) != attributes[0](self):
          break
      else:
        return
      
      Logger.info("Modifying group %s" % (self.resource.group_name))

    for option_name, attributes in self.options.iteritems():
      option_value = getattr(self.resource, option_name)
      if attributes[1] and option_value:
        command += [attributes[1], str(option_value)]
        
    command.append(self.resource.group_name)

    # if trying to modify existing group, but no values to modify are provided
    if self.group and len(command) == 1:
      return
    
    shell.checked_call(command, sudo=True)
Example #2
0
def read_file(filename):
  tmpf = tempfile.NamedTemporaryFile()
  shell.checked_call(["cp", "-f", filename, tmpf.name], sudo=True)
  
  with tmpf:
    with open(tmpf.name, "rb") as fp:
      return fp.read()
Example #3
0
 def action_run(self):
   if self.resource.creates:
     if sudo.path_exists(self.resource.creates):
       Logger.info("Skipping %s due to creates" % self.resource)
       return
     
   env = self.resource.environment
         
   for i in range (0, self.resource.tries):
     try:
       shell.checked_call(self.resource.command, logoutput=self.resource.logoutput,
                           cwd=self.resource.cwd, env=env,
                           preexec_fn=_preexec_fn(self.resource), user=self.resource.user,
                           wait_for_finish=self.resource.wait_for_finish,
                           timeout=self.resource.timeout,
                           path=self.resource.path,
                           sudo=self.resource.sudo,
                           on_new_line=self.resource.on_new_line)
       break
     except Fail as ex:
       if i == self.resource.tries-1: # last try
         raise ex
       else:
         Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex)))
         time.sleep(self.resource.try_sleep)
     except ExecuteTimeoutException:
       err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (self.resource.command, self.resource.timeout)
       
       if self.resource.on_timeout:
         Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg))
         shell.checked_call(self.resource.on_timeout)
       else:
         raise Fail(err_msg)
  def action_create(self):
    with Environment.get_instance_copy() as env:
      repo_file_name = self.resource.repo_file_name
      repo_dir = get_repo_dir()
      new_content = InlineTemplate(self.resource.repo_template, repo_id=self.resource.repo_id, repo_file_name=self.resource.repo_file_name,
                             base_url=self.resource.base_url, mirror_list=self.resource.mirror_list)
      repo_file_path = format("{repo_dir}/{repo_file_name}.repo")

      if os.path.isfile(repo_file_path):
        existing_content_str = sudo.read_file(repo_file_path)
        new_content_str = new_content.get_content()
        if existing_content_str != new_content_str and OSCheck.is_suse_family():
          # We need to reset package manager's cache when we replace base urls
          # at existing repo. That is a case at least under SLES
          Logger.info("Flushing package manager cache since repo file content is about to change")
          checked_call(self.update_cmd, sudo=True)
        if self.resource.append_to_file:
          content = existing_content_str + '\n' + new_content_str
        else:
          content = new_content_str
      else: # If repo file does not exist yet
        content = new_content

      File(repo_file_path,
           content=content
      )
Example #5
0
  def action_run(self):
    if self.resource.creates:
      if os.path.exists(self.resource.creates):
        return

    Logger.debug("Executing %s" % self.resource)
    
    if self.resource.path != []:
      if not self.resource.environment:
        self.resource.environment = {}
      
      self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) 
    
    for i in range (0, self.resource.tries):
      try:
        shell.checked_call(self.resource.command, logoutput=self.resource.logoutput,
                            cwd=self.resource.cwd, env=self.resource.environment,
                            preexec_fn=_preexec_fn(self.resource), user=self.resource.user,
                            wait_for_finish=self.resource.wait_for_finish)
        break
      except Fail as ex:
        if i == self.resource.tries-1: # last try
          raise ex
        else:
          Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex)))
          time.sleep(self.resource.try_sleep)
Example #6
0
 def remove_package(self, name):
   if self._check_existence(name):
     cmd = REMOVE_CMD % (name)
     Logger.info("Removing package %s ('%s')" % (name, cmd))
     shell.checked_call(cmd)
   else:
     Logger.info("Skipping removing non-existent package %s" % (name))
Example #7
0
 def install_package(self, name):
   if not self._check_existence(name):
     cmd = INSTALL_CMD % (name)
     Logger.info("Installing package %s ('%s')" % (name, cmd))
     shell.checked_call(cmd)
   else:
     Logger.info("Skipping installing existent package %s" % (name))
Example #8
0
 def remove_package(self, name):
   if self._check_existence(name):
     cmd = REMOVE_CMD[self.get_logoutput()] + [name]
     Logger.info("Removing package %s ('%s')" % (name, string_cmd_from_args_list(cmd)))
     shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput())
   else:
     Logger.info("Skipping removal of non-existing package %s" % (name))
Example #9
0
def chmod_recursive(path, recursive_mode_flags, recursion_follow_links):
  find_flags = []
  if recursion_follow_links:
    find_flags.append('-L')
    
  for key, flags in recursive_mode_flags.iteritems():
    shell.checked_call(["find"] + find_flags + [path, "-type", key, "-exec" , "chmod", flags ,"{}" ,";"])
Example #10
0
  def action_run(self):
    if self.resource.creates:
      if os.path.exists(self.resource.creates):
        return

    Logger.debug("Executing %s" % self.resource)
    
    if self.resource.path != []:
      if not self.resource.environment:
        self.resource.environment = {}
      
      self.resource.environment['PATH'] = os.pathsep.join(self.resource.path) 
    
    for i in range (0, self.resource.tries):
      try:
        shell.checked_call(self.resource.command, logoutput=self.resource.logoutput,
                            cwd=self.resource.cwd, env=self.resource.environment,
                            preexec_fn=_preexec_fn(self.resource), user=self.resource.user,
                            wait_for_finish=self.resource.wait_for_finish, timeout=self.resource.timeout,
                            pid_file=self.resource.pid_file, poll_after=self.resource.poll_after)
        break
      except Fail as ex:
        if i == self.resource.tries-1: # last try
          raise ex
        else:
          Logger.info("Retrying after %d seconds. Reason: %s" % (self.resource.try_sleep, str(ex)))
          time.sleep(self.resource.try_sleep)
      except ExecuteTimeoutException:
        err_msg = ("Execution of '%s' was killed due timeout after %d seconds") % (self.resource.command, self.resource.timeout)
        
        if self.resource.on_timeout:
          Logger.info("Executing '%s'. Reason: %s" % (self.resource.on_timeout, err_msg))
          shell.checked_call(self.resource.on_timeout)
        else:
          raise Fail(err_msg)
Example #11
0
def select(stack_name, package, version, try_create=True, ignore_errors=False):
  """
  Selects a config version for the specified package. If this detects that
  the stack supports configuration versioning but /etc/<component>/conf is a
  directory, then it will attempt to bootstrap the conf.backup directory and change
  /etc/<component>/conf into a symlink.

  :param stack_name: the name of the stack
  :param package: the name of the package, as-used by <conf-selector-tool>
  :param version: the version number to create
  :param try_create: optional argument to attempt to create the directory before setting it
  :param ignore_errors: optional argument to ignore any error and simply log a warning
  """
  try:
    # do nothing if the stack does not support versioned configurations
    if not _valid(stack_name, package, version):
      return

    if try_create:
      create(stack_name, package, version)

    shell.checked_call(_get_cmd("set-conf-dir", package, version), logoutput=False, quiet=False, sudo=True)

    # for consistency sake, we must ensure that the /etc/<component>/conf symlink exists and
    # points to <stack-root>/current/<component>/conf - this is because some people still prefer to
    # use /etc/<component>/conf even though <stack-root> is the "future"
    package_dirs = get_package_dirs()
    if package in package_dirs:
      Logger.info("Ensuring that {0} has the correct symlink structure".format(package))

      directory_list = package_dirs[package]
      for directory_structure in directory_list:
        conf_dir = directory_structure["conf_dir"]
        current_dir = directory_structure["current_dir"]

        # if /etc/<component>/conf is missing or is not a symlink
        if not os.path.islink(conf_dir):
          # if /etc/<component>/conf is not a link and it exists, convert it to a symlink
          if os.path.exists(conf_dir):
            parent_directory = os.path.dirname(conf_dir)
            conf_backup_dir = os.path.join(parent_directory, "conf.backup")

            # create conf.backup and copy files to it (if it doesn't exist)
            Execute(("cp", "-R", "-p", conf_dir, conf_backup_dir),
              not_if = format("test -e {conf_backup_dir}"), sudo = True)

            # delete the old /etc/<component>/conf directory and link to the backup
            Directory(conf_dir, action="delete")
            Link(conf_dir, to = conf_backup_dir)
          else:
            # missing entirely
            # /etc/<component>/conf -> <stack-root>/current/<component>/conf
            Link(conf_dir, to = current_dir)

  except Exception, exception:
    if ignore_errors is True:
      Logger.warning("Could not select the directory for package {0}. Error: {1}".format(package,
        str(exception)))
    else:
      raise
Example #12
0
  def action_create(self):
    if not self.user:
      command = ['useradd', "-m"]
      Logger.info("Adding user %s" % self.resource)
    else:
      command = ['usermod']
      Logger.info("Modifying user %s" % (self.resource.username))

    options = dict(
      comment="-c",
      gid="-g",
      uid="-u",
      shell="-s",
      password="******",
      home="-d",
    )

    if self.resource.system and not self.user:
      command.append("--system")

    if self.resource.groups:
      command += ["-G", ",".join(self.resource.groups)]

    for option_name, option_flag in options.items():
      option_value = getattr(self.resource, option_name)
      if option_flag and option_value:
        command += [option_flag, str(option_value)]

    command.append(self.resource.username)

    shell.checked_call(command)
Example #13
0
 def chown_recursive(path, owner, group, follow_links=False):
   owner = owner.pw_name if owner else ""
   group = group.gr_name if group else ""
   if owner or group:
     flags = ["-R"]
     if follow_links:
       flags.append("-L")
     shell.checked_call(["chown"] + flags + [owner+":"+group, path], sudo=True)
Example #14
0
 def action_install(self):
   package_name = self.resource.package_name
   location = self.resource.location
   if not self._check_existence(package_name, location):
     cmd = TAR_CMD % (package_name, location)
     if package_name.lower().endswith("zip"):
       cmd = ZIP_CMD % (package_name, location)
     Logger.info("Installing tarball %s at %s (%s)" % (package_name, location, cmd))
     shell.checked_call(cmd)
Example #15
0
 def read_file(filename, encoding=None):
   tmpf = tempfile.NamedTemporaryFile()
   shell.checked_call(["cp", "-f", filename, tmpf.name], sudo=True)
   
   with tmpf:
     with open(tmpf.name, "rb") as fp:
       content = fp.read()
       
   content = content.decode(encoding) if encoding else content
   return content
Example #16
0
 def install_package(self, name, use_repos=[]):
     if not self._check_existence(name) or use_repos:
         cmd = INSTALL_CMD[self.get_logoutput()]
         if use_repos:
             enable_repo_option = "--enablerepo=" + ",".join(use_repos)
             cmd = cmd + ["--disablerepo=*", enable_repo_option]
         cmd = cmd + [name]
         Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd)))
         shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput())
     else:
         Logger.info("Skipping installing existent package %s" % (name))
Example #17
0
  def create_file(filename, content, encoding=None):
    """
    if content is None, create empty file
    """
    content = content if content else ""
    content = content.encode(encoding) if encoding else content

    tmpf_name = tempfile.gettempdir() + os.sep + tempfile.template + str(time.time()) + "_" + str(random.randint(0, 1000))
    try:
        with open(tmpf_name, "wb") as fp:
            fp.write(content)
        shell.checked_call(["cp", "-f", tmpf_name, filename], sudo=True)
    finally:
        os.unlink(tmpf_name)
Example #18
0
def create_file(filename, content):
  """
  if content is None, create empty file
  """
  tmpf = tempfile.NamedTemporaryFile()
  
  if content:
    with open(tmpf.name, "wb") as fp:
      fp.write(content)
  
  with tmpf:    
    shell.checked_call(["cp", "-f", tmpf.name, filename], sudo=True)
    
  # set default files mode
  chmod(filename, 0644)
Example #19
0
 def action_run(self):
   if self.resource.creates:
     if sudo.path_exists(self.resource.creates):
       Logger.info("Skipping %s due to creates" % self.resource)
       return
     
   shell.checked_call(self.resource.command, logoutput=self.resource.logoutput,
                       cwd=self.resource.cwd, env=self.resource.environment,
                       preexec_fn=_preexec_fn(self.resource), user=self.resource.user,
                       wait_for_finish=self.resource.wait_for_finish,
                       timeout=self.resource.timeout,on_timeout=self.resource.on_timeout,
                       path=self.resource.path,
                       sudo=self.resource.sudo,
                       on_new_line=self.resource.on_new_line,
                       stdout=self.resource.stdout,stderr=self.resource.stderr,
                       tries=self.resource.tries, try_sleep=self.resource.try_sleep)
Example #20
0
 def listdir(path):
   if not path_isdir(path):
     raise Fail("{0} is not a directory. Cannot list files of it.".format(path))
   
   code, out, err = shell.checked_call(["ls", path], sudo=True, stderr=subprocess.PIPE)
   files = out.splitlines()
   return files
Example #21
0
    def action_create(self):
        with Environment.get_instance_copy() as env:
            with tempfile.NamedTemporaryFile() as tmpf:
                repo_file_name = format("{repo_file_name}.list", repo_file_name=self.resource.repo_file_name)
                repo_file_path = format("{repo_dir}/{repo_file_name}", repo_dir=self.repo_dir)

                new_content = Template(
                    self.resource.repo_template,
                    package_type=self.package_type,
                    base_url=self.resource.base_url,
                    components=" ".join(self.resource.components),
                ).get_content()
                old_content = ""
                if self.resource.append_to_file and os.path.isfile(repo_file_path):
                    with open(repo_file_path) as repo_file:
                        old_content = repo_file.read() + "\n"

                File(tmpf.name, content=old_content + new_content)

                if not os.path.isfile(repo_file_path) or not filecmp.cmp(tmpf.name, repo_file_path):
                    File(repo_file_path, content=StaticFile(tmpf.name))

                    update_cmd_formatted = [format(x) for x in self.update_cmd]
                    # this is time expensive
                    retcode, out = checked_call(update_cmd_formatted, sudo=True)

                    # add public keys for new repos
                    missing_pkeys = set(re.findall(self.missing_pkey_regex, out))
                    for pkey in missing_pkeys:
                        Execute(
                            format(self.add_pkey_cmd),
                            timeout=15,  # in case we are on the host w/o internet (using localrepo), we should ignore hanging
                            ignore_failures=True,
                        )
Example #22
0
 def __init__(self, path):
   cmd = ["stat", "-c", "%u %g %a", path]
   code, out, err = shell.checked_call(cmd, sudo=True, stderr=subprocess.PIPE)
   values = out.split(' ')
   if len(values) != 3:
     raise Fail("Execution of '{0}' returned unexpected output. {2}\n{3}".format(cmd, code, err, out))
   uid_str, gid_str, mode_str = values
   self.st_uid, self.st_gid, self.st_mode = int(uid_str), int(gid_str), int(mode_str, 8)
Example #23
0
def get_unique_id_and_date():
    out = shell.checked_call("hostid")[1]
    id = out.strip()

    now = datetime.datetime.now()
    date = now.strftime("%M%d%y")

    return "id{id}_date{date}".format(id=id, date=date)
Example #24
0
def get_unique_id_and_date():
    out = shell.checked_call("hostid")[1].split('\n')[-1] # bugfix: take the lastline (stdin is not tty part cut)
    id = out.strip()

    now = datetime.datetime.now()
    date = now.strftime("%M%d%y")

    return "id{id}_date{date}".format(id=id, date=date)
Example #25
0
  def install_package(self, name, use_repos=[]):
    if not self._check_existence(name) or use_repos:
      cmd = INSTALL_CMD[self.get_logoutput()]
      if use_repos:
        active_base_repos = get_active_base_repos()
        if 'base' in use_repos:
          use_repos = filter(lambda x: x != 'base', use_repos)
          use_repos.extend(active_base_repos)
        use_repos_options = []
        for repo in use_repos:
          use_repos_options = use_repos_options + ['--repo', repo]
        cmd = cmd + use_repos_options

      cmd = cmd + [name]
      Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd)))
      shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput())
    else:
      Logger.info("Skipping installing existent package %s" % (name))
Example #26
0
File: apt.py Project: duxia/ambari
 def install_package(self, name):
   if not self._check_existence(name):
     cmd = INSTALL_CMD % (name)
     Logger.info("Installing package %s ('%s')" % (name, cmd))
     code, out = shell.call(cmd)
     
     # apt-get update wasn't done too long
     if code:
       Logger.info("Execution of '%s' returned %d. %s" % (cmd, code, out))
       Logger.info("Failed to install package %s. Executing `%s`" % (name, REPO_UPDATE_CMD))
       code, out = shell.call(REPO_UPDATE_CMD)
       
       if code:
         Logger.info("Execution of '%s' returned %d. %s" % (REPO_UPDATE_CMD, code, out))
         
       Logger.info("Retrying to install package %s" % (name))
       shell.checked_call(cmd)
   else:
     Logger.info("Skipping installing existent package %s" % (name))
Example #27
0
  def install_package(self, name, use_repos=[], skip_repos=[]):
    if use_repos or not self._check_existence(name):
      cmd = INSTALL_CMD[self.get_logoutput()]
      copied_sources_files = []
      is_tmp_dir_created = False
      if use_repos:
        is_tmp_dir_created = True
        apt_sources_list_tmp_dir = tempfile.mkdtemp(suffix="-ambari-apt-sources-d")
        Logger.info("Temporal sources directory was created: %s" % apt_sources_list_tmp_dir)
        if 'base' not in use_repos:
          cmd = cmd + ['-o', 'Dir::Etc::SourceList=%s' % EMPTY_FILE]
        for repo in use_repos:
          if repo != 'base':
            new_sources_file = os.path.join(apt_sources_list_tmp_dir, repo + '.list')
            Logger.info("Temporal sources file will be copied: %s" % new_sources_file)
            shutil.copy(os.path.join(APT_SOURCES_LIST_DIR, repo + '.list'), new_sources_file)
            copied_sources_files.append(new_sources_file)
        cmd = cmd + ['-o', 'Dir::Etc::SourceParts=%s' % apt_sources_list_tmp_dir]

      cmd = cmd + [name]
      Logger.info("Installing package %s ('%s')" % (name, string_cmd_from_args_list(cmd)))
      code, out = shell.call(cmd, sudo=True, env=INSTALL_CMD_ENV, logoutput=self.get_logoutput())
      
      # apt-get update wasn't done too long
      if code:
        Logger.info("Execution of '%s' returned %d. %s" % (cmd, code, out))
        Logger.info("Failed to install package %s. Executing `%s`" % (name, string_cmd_from_args_list(REPO_UPDATE_CMD)))
        code, out = shell.call(REPO_UPDATE_CMD, sudo=True, logoutput=self.get_logoutput())
        
        if code:
          Logger.info("Execution of '%s' returned %d. %s" % (REPO_UPDATE_CMD, code, out))
          
        Logger.info("Retrying to install package %s" % (name))
        shell.checked_call(cmd, sudo=True, logoutput=self.get_logoutput())

      if is_tmp_dir_created:
        for temporal_sources_file in copied_sources_files:
          Logger.info("Removing temporal sources file: %s" % temporal_sources_file)
          os.remove(temporal_sources_file)
        Logger.info("Removing temporal sources directory: %s" % apt_sources_list_tmp_dir)
        os.rmdir(apt_sources_list_tmp_dir)
    else:
      Logger.info("Skipping installation of existing package %s" % (name))
Example #28
0
    def action_create(self):
        group = self.group
        if not group:
            command = ["groupadd"]
            Logger.info("Adding group %s" % self.resource)
        else:
            command = ["groupmod"]
            Logger.info("Modifying group %s" % (self.resource.group_name))

        options = dict(gid="-g", password="******")

        for option_name, option_flag in options.items():
            option_value = getattr(self.resource, option_name)
            if option_flag and option_value:
                command += [option_flag, str(option_value)]

        command.append(self.resource.group_name)

        shell.checked_call(command)

        group = self.group
Example #29
0
  def action_create(self):
    if not self.user:
      command = ['useradd', "-m"]
      Logger.info("Adding user %s" % self.resource)
    else:
      command = ['usermod']
      Logger.info("Modifying user %s" % (self.resource.username))

    options = dict(
      comment="-c",
      gid="-g",
      uid="-u",
      shell="-s",
      password="******",
      home="-d",
    )

    if self.resource.system and not self.user:
      command.append("--system")

    if self.resource.groups:
      
      groups = self.resource.groups
      if self.user and self.user_groups:
        groups += self.user_groups
      
      command += ["-G", ",".join(groups)]

    for option_name, option_flag in options.items():
      option_value = getattr(self.resource, option_name)
      if option_flag and option_value:
        command += [option_flag, str(option_value)]

    # if trying to modify existing user, but no values to modify are provided
    if self.user and len(command) == 1:
      return

    command.append(self.resource.username)

    shell.checked_call(command, sudo=True)
def select(stack_name, package, version, try_create=True):
  """
  Selects a config version for the specified package.
  :stack_name: the name of the stack
  :package: the name of the package, as-used by conf-select
  :version: the version number to create
  :try_create: optional argument to attempt to create the directory before setting it
  """
  if not _valid(stack_name, package, version):
    return

  if try_create:
    create(stack_name, package, version)

  shell.checked_call(get_cmd("set-conf-dir", package, version), logoutput=False, quiet=False, sudo=True)

  # for consistency sake, we must ensure that the /etc/<component>/conf symlink exists and
  # points to /usr/hdp/current/<component>/conf - this is because some people still prefer to
  # use /etc/<component>/conf even though /usr/hdp is the "future"
  if package in PACKAGE_DIRS:
    Logger.info("Ensuring that {0} has the correct symlink structure".format(package))

    directory_list = PACKAGE_DIRS[package]
    for directory_structure in directory_list:
      conf_dir = directory_structure["conf_dir"]
      current_dir = directory_structure["current_dir"]

      # if /etc/<component>/conf is not a symlink, we need to change it
      if not os.path.islink(conf_dir):
        # if it exists, try to back it up
        if os.path.exists(conf_dir):
          parent_directory = os.path.dirname(conf_dir)
          conf_install_dir = os.path.join(parent_directory, "conf.backup")

          Execute(("cp", "-R", "-p", conf_dir, conf_install_dir),
            not_if = format("test -e {conf_install_dir}"), sudo = True)

          Directory(conf_dir, action="delete")

        Link(conf_dir, to = current_dir)
def execute(configurations={}, parameters={}, host_name=None):
    """
    Returns a tuple containing the result code and a pre-formatted result label

    Keyword arguments:
    configurations (dictionary): a mapping of configuration key to value
    parameters (dictionary): a mapping of script parameter key to value
    host_name (string): the name of this host where the alert is running
    """

    if configurations is None:
        return (UKNOWN_STATUS_CODE,
                ['There were no configurations supplied to the script.'])

    result_code = None

    try:
        use_external_hbase = False
        if USE_EXTERNAL_HBASE_KEY in configurations:
            use_external_hbase = str(
                configurations[USE_EXTERNAL_HBASE_KEY]).upper() == 'TRUE'

        if use_external_hbase:
            return (OK_RESULT_CODE, ['use_external_hbase set to true.'])

        is_hbase_system_service_launch = False
        if ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY in configurations:
            is_hbase_system_service_launch = str(
                configurations[ATS_HBASE_SYSTEM_SERVICE_LAUNCH_KEY]).upper(
                ) == 'TRUE'

        yarn_hbase_user = "******"
        if ATS_HBASE_USER_KEY in configurations:
            yarn_hbase_user = configurations[ATS_HBASE_USER_KEY]

        if not is_hbase_system_service_launch:
            yarn_hbase_pid_dir_prefix = ""
            if ATS_HBASE_PID_DIR_PREFIX in configurations:
                yarn_hbase_pid_dir_prefix = configurations[
                    ATS_HBASE_PID_DIR_PREFIX]
            else:
                return (UKNOWN_STATUS_CODE, [
                    'The yarn_hbase_pid_dir_prefix is a required parameter.'
                ])
            yarn_hbase_pid_dir = format(
                "{yarn_hbase_pid_dir_prefix}/{yarn_hbase_user}")
            master_pid_file = format(
                "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-master.pid")
            rs_pid_file = format(
                "{yarn_hbase_pid_dir}/hbase-{yarn_hbase_user}-regionserver.pid"
            )

            if host_name is None:
                host_name = socket.getfqdn()

            master_process_running = is_monitor_process_live(master_pid_file)
            rs_process_running = is_monitor_process_live(rs_pid_file)

            alert_state = OK_RESULT_CODE if master_process_running and rs_process_running else CRITICAL_RESULT_CODE

            alert_label = 'ATS embedded HBase is running on {0}' if master_process_running and rs_process_running else 'ATS embedded HBase is NOT running on {0}'
            alert_label = alert_label.format(host_name)

            return (alert_state, [alert_label])
        else:
            security_enabled = False
            if SECURITY_ENABLED_KEY in configurations:
                security_enabled = str(
                    configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

            check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT
            if CHECK_COMMAND_TIMEOUT_KEY in configurations:
                check_command_timeout = int(
                    parameters[CHECK_COMMAND_TIMEOUT_KEY])

            if security_enabled:
                if ATS_HBASE_PRINCIPAL_KEY in configurations:
                    ats_hbase_app_principal = configurations[
                        ATS_HBASE_PRINCIPAL_KEY]
                    ats_hbase_app_principal = ats_hbase_app_principal.replace(
                        '_HOST', host_name.lower())

                if ATS_HBASE_PRINCIPAL_KEYTAB_KEY in configurations:
                    ats_hbase_app_keytab = configurations[
                        ATS_HBASE_PRINCIPAL_KEYTAB_KEY]

                # Get the configured Kerberos executable search paths, if any
                if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
                    kerberos_executable_search_paths = configurations[
                        KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
                else:
                    kerberos_executable_search_paths = None

                kinit_path_local = get_kinit_path(
                    kerberos_executable_search_paths)
                kinitcmd = format(
                    "{kinit_path_local} -kt {ats_hbase_app_keytab} {ats_hbase_app_principal}; "
                )

                # prevent concurrent kinit
                kinit_lock = global_lock.get_lock(
                    global_lock.LOCK_TYPE_KERBEROS)
                kinit_lock.acquire()
                try:
                    Execute(kinitcmd,
                            user=yarn_hbase_user,
                            path=["/bin/", "/usr/bin/", "/usr/sbin/"],
                            timeout=10)
                finally:
                    kinit_lock.release()

            start_time = time.time()
            ats_hbase_status_cmd = STACK_ROOT_DEFAULT + format(
                "/current/hadoop-yarn-client/bin/yarn app -status ats-hbase")

            code, output, error = shell.checked_call(
                ats_hbase_status_cmd,
                user=yarn_hbase_user,
                stderr=subprocess.PIPE,
                timeout=check_command_timeout,
                logoutput=False)
            if code != 0:
                alert_label = traceback.format_exc()
                result_code = UKNOWN_STATUS_CODE
                return (result_code, [alert_label])

            # Call for getting JSON
            ats_hbase_app_info = make_valid_json(output)

            if ats_hbase_app_info is None:
                alert_label = CRITICAL_MESSAGE
                result_code = CRITICAL_RESULT_CODE
                return (result_code, [alert_label])

            if 'state' not in ats_hbase_app_info:
                alert_label = traceback.format_exc()
                result_code = UKNOWN_STATUS_CODE
                return (result_code, [alert_label])

            retrieved_ats_hbase_app_state = ats_hbase_app_info['state'].upper()

            if retrieved_ats_hbase_app_state in ['STABLE']:
                result_code = OK_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = OK_MESSAGE.format(retrieved_ats_hbase_app_state,
                                                total_time)
            else:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                    retrieved_ats_hbase_app_state, total_time)
    except:
        alert_label = traceback.format_exc()
        traceback.format_exc()
        result_code = CRITICAL_RESULT_CODE
    return (result_code, [alert_label])
Example #32
0
 def copy(src, dst):
   shell.checked_call(["sudo", "cp", "-r", src, dst], sudo=True)
Example #33
0
 def rmtree(path):
     shell.checked_call(["rm", "-rf", path], sudo=True)
Example #34
0
 def link(source, link_name):
     shell.checked_call(["ln", "-f", source, link_name], sudo=True)
Example #35
0
 def chmod_extended(path, mode):
     shell.checked_call(["chmod", mode, path], sudo=True)
Example #36
0
 def chown(path, owner, group):
     owner = owner.pw_name if owner else ""
     group = group.gr_name if group else ""
     if owner or group:
         shell.checked_call(["chown", owner + ":" + group, path], sudo=True)
Example #37
0
    def service_check(self, env):
        import params
        env.set_params(params)

        params.HdfsResource(
            format("/user/{smokeuser}"),
            type="directory",
            action="create_on_execute",
            owner=params.smokeuser,
            mode=params.smoke_hdfs_user_mode,
        )

        if params.stack_version_formatted_major and check_stack_feature(
                StackFeature.ROLLING_UPGRADE,
                params.stack_version_formatted_major):
            path_to_distributed_shell_jar = format(
                "{stack_root}/current/hadoop-yarn-client/hadoop-yarn-applications-distributedshell.jar"
            )
        else:
            path_to_distributed_shell_jar = "/usr/lib/hadoop-yarn/hadoop-yarn-applications-distributedshell*.jar"

        yarn_distrubuted_shell_check_params = [
            "yarn org.apache.hadoop.yarn.applications.distributedshell.Client",
            "-shell_command", "ls", "-num_containers", "{number_of_nm}",
            "-jar", "{path_to_distributed_shell_jar}", "-timeout", "300000",
            "--queue", "{service_check_queue_name}"
        ]
        yarn_distrubuted_shell_check_cmd = format(
            " ".join(yarn_distrubuted_shell_check_params))

        if params.security_enabled:
            kinit_cmd = format(
                "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal};"
            )
            smoke_cmd = format(
                "{kinit_cmd} {yarn_distrubuted_shell_check_cmd}")
        else:
            smoke_cmd = yarn_distrubuted_shell_check_cmd

        return_code, out = shell.checked_call(
            smoke_cmd,
            path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
            user=params.smokeuser,
        )

        m = re.search("appTrackingUrl=(.*),\s", out)
        app_url = m.group(1)

        splitted_app_url = str(app_url).split('/')

        for item in splitted_app_url:
            if "application" in item:
                application_name = item

        # Find out the active RM from RM list
        # Raise an exception if the active rm cannot be determined
        active_rm_webapp_address = self.get_active_rm_webapp_address()
        Logger.info("Active Resource Manager web app address is : " +
                    active_rm_webapp_address)

        # Verify job state from active resource manager via rest api
        info_app_url = params.scheme + "://" + active_rm_webapp_address + "/ws/v1/cluster/apps/" + application_name
        get_app_info_cmd = "curl --negotiate -u : -ks --location-trusted --connect-timeout " + CURL_CONNECTION_TIMEOUT + " " + info_app_url

        return_code, stdout, _ = get_user_call_output(
            get_app_info_cmd,
            user=params.smokeuser,
            path='/usr/sbin:/sbin:/usr/local/bin:/bin:/usr/bin',
        )

        try:
            json_response = json.loads(stdout)
        except Exception as e:
            raise Fail(
                format(
                    "Response from YARN API was not a valid JSON. Response: {stdout}"
                ))

        if json_response is None or 'app' not in json_response or \
                'state' not in json_response['app'] or 'finalStatus' not in json_response['app']:
            raise Fail("Application " + app_url + " returns invalid data.")

        if json_response['app']['state'] != "FINISHED" or json_response['app'][
                'finalStatus'] != "SUCCEEDED":
            raise Fail(
                "Application " + app_url +
                " state/status is not valid. Should be FINISHED/SUCCEEDED.")
Example #38
0
def checkAndStopRegistyDNS():
    import params
    import status_params

    componentName = 'registrydns'
    action = 'stop'
    daemon = format("{yarn_bin}/yarn")
    hadoop_env_exports = {'HADOOP_LIBEXEC_DIR': params.hadoop_libexec_dir}

    # When registry dns is switched from non-privileged to privileged mode or the other way,
    # then the previous instance of registry dns has a different pid/user.
    # Checking if either of the processes are running and shutting them down if they are.

    # privileged mode
    dns_pid_file = status_params.yarn_registry_dns_priv_pid_file
    dns_user = status_params.root_user
    Logger.info("checking any existing dns pid file = '" + dns_pid_file +
                "' dns user '" + dns_user + "'")
    try:
        # these are needed for unknown reasons
        env_exports = {
            'HADOOP_PID_DIR': params.yarn_pid_dir,
            'HADOOP_SECURE_PID_DIR': params.yarn_pid_dir,
            'HADOOP_LOG_DIR': params.yarn_log_dir,
            'HADOOP_SECURE_LOG_DIR': params.yarn_log_dir
        }
        env_exports.update(hadoop_env_exports)
        cmd = [
            daemon, "--config", params.hadoop_conf_dir, "--daemon", action,
            componentName
        ]
        daemon_cmd = as_sudo(cmd)
        process_id_exists_command = as_sudo([
            "test", "-f", dns_pid_file
        ]) + " && " + as_sudo(["pgrep", "-F", dns_pid_file])
        Execute(daemon_cmd,
                only_if=process_id_exists_command,
                environment=env_exports)
    except:
        # When the registry dns port is modified but registry dns is not started
        # immediately, then the configs in yarn-env.sh & yarn-site.xml related
        # to registry dns may have already changed. This introduces a discrepancy
        # between the actual process that is running and the configs.
        # For example, when port is changed from 5300 to 53,
        # then dns port = 53 in yarn-site and YARN_REGISTRYDNS_SECURE_* envs in yarn-env.sh
        # are saved. So, while trying to shutdown the stray non-privileged registry dns process
        # after sometime, yarn daemon from the configs thinks that it needs privileged
        # access and throws an exception. In such cases, we try to kill the stray process.
        pass
    process_id_does_not_exist_command = format(
        "! ( {process_id_exists_command} )")
    code, out = shell.call(process_id_does_not_exist_command,
                           env=env_exports,
                           tries=5,
                           try_sleep=5)
    if code != 0:
        code, out, err = shell.checked_call(("pgrep", "-f", dns_pid_file),
                                            sudo=True,
                                            env=env_exports,
                                            stderr=subprocess32.PIPE)
        Logger.info("PID to kill was retrieved: '" + out + "'.")
        for pid in out.splitlines():
            try:
                Execute(("kill", "-9", pid), sudo=True)
            except:
                # ignoring failures
                Logger.warning("failed to kill pid '" + pid + "'.")
                pass
    File(dns_pid_file, action="delete")

    # non-privileged mode
    dns_pid_file = status_params.yarn_registry_dns_pid_file
    dns_user = params.yarn_user
    Logger.info("checking any existing dns pid file = '" + dns_pid_file +
                "' dns user '" + dns_user + "'")
    try:
        cmd = format(
            "{daemon} --config {hadoop_conf_dir} --daemon {action} {componentName}"
        )
        daemon_cmd = as_user(cmd, dns_user)
        Execute(daemon_cmd, environment=hadoop_env_exports)
    except:
        pass
Example #39
0
def select(stack_name, package, version, try_create=True, ignore_errors=False):
    """
  Selects a config version for the specified package. If this detects that
  the stack supports configuration versioning but /etc/<component>/conf is a
  directory, then it will attempt to bootstrap the conf.backup directory and change
  /etc/<component>/conf into a symlink.

  :param stack_name: the name of the stack
  :param package: the name of the package, as-used by <conf-selector-tool>
  :param version: the version number to create
  :param try_create: optional argument to attempt to create the directory before setting it
  :param ignore_errors: optional argument to ignore any error and simply log a warning
  """
    try:
        # do nothing if the stack does not support versioned configurations
        if not _valid(stack_name, package, version):
            return

        if try_create:
            create(stack_name, package, version)

        shell.checked_call(_get_cmd("set-conf-dir", package, version),
                           logoutput=False,
                           quiet=False,
                           sudo=True)

        # for consistency sake, we must ensure that the /etc/<component>/conf symlink exists and
        # points to <stack-root>/current/<component>/conf - this is because some people still prefer to
        # use /etc/<component>/conf even though <stack-root> is the "future"
        package_dirs = get_package_dirs()
        if package in package_dirs:
            Logger.info(
                "Ensuring that {0} has the correct symlink structure".format(
                    package))

            directory_list = package_dirs[package]
            for directory_structure in directory_list:
                conf_dir = directory_structure["conf_dir"]
                current_dir = directory_structure["current_dir"]

                # if /etc/<component>/conf is missing or is not a symlink
                if not os.path.islink(conf_dir):
                    # if /etc/<component>/conf is not a link and it exists, convert it to a symlink
                    if os.path.exists(conf_dir):
                        parent_directory = os.path.dirname(conf_dir)
                        conf_backup_dir = os.path.join(parent_directory,
                                                       "conf.backup")

                        # create conf.backup and copy files to it (if it doesn't exist)
                        Execute(("cp", "-R", "-p", conf_dir, conf_backup_dir),
                                not_if=format("test -e {conf_backup_dir}"),
                                sudo=True)

                        # delete the old /etc/<component>/conf directory and link to the backup
                        Directory(conf_dir, action="delete")
                        Link(conf_dir, to=conf_backup_dir)
                    else:
                        # missing entirely
                        # /etc/<component>/conf -> <stack-root>/current/<component>/conf
                        Link(conf_dir, to=current_dir)

    except Exception, exception:
        if ignore_errors is True:
            Logger.warning(
                "Could not select the directory for package {0}. Error: {1}".
                format(package, str(exception)))
        else:
            raise
Example #40
0
def execute(configurations={}, parameters={}, host_name=None):
    """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """

    LLAP_APP_STATUS_CMD_TIMEOUT = 0

    if configurations is None:
        return ('UNKNOWN',
                ['There were no configurations supplied to the script.'])

    result_code = None

    try:
        security_enabled = False
        if SECURITY_ENABLED_KEY in configurations:
            security_enabled = str(
                configurations[SECURITY_ENABLED_KEY]).upper() == 'TRUE'

        check_command_timeout = CHECK_COMMAND_TIMEOUT_DEFAULT
        if CHECK_COMMAND_TIMEOUT_KEY in configurations:
            check_command_timeout = int(parameters[CHECK_COMMAND_TIMEOUT_KEY])

        hive_user = HIVE_USER_DEFAULT
        if HIVE_USER_KEY in configurations:
            hive_user = configurations[HIVE_USER_KEY]

        llap_app_name = LLAP_APP_NAME_DEFAULT
        if LLAP_APP_NAME_KEY in configurations:
            llap_app_name = configurations[LLAP_APP_NAME_KEY]

        if security_enabled:
            if HIVE_PRINCIPAL_KEY in configurations:
                llap_principal = configurations[HIVE_PRINCIPAL_KEY]
            else:
                llap_principal = HIVE_PRINCIPAL_DEFAULT
            llap_principal = llap_principal.replace('_HOST', host_name.lower())

            llap_keytab = HIVE_PRINCIPAL_KEYTAB_DEFAULT
            if HIVE_PRINCIPAL_KEYTAB_KEY in configurations:
                llap_keytab = configurations[HIVE_PRINCIPAL_KEYTAB_KEY]

            # Get the configured Kerberos executable search paths, if any
            if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
                kerberos_executable_search_paths = configurations[
                    KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
            else:
                kerberos_executable_search_paths = None

            kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
            kinitcmd = format(
                "{kinit_path_local} -kt {llap_keytab} {llap_principal}; ")

            # prevent concurrent kinit
            kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
            kinit_lock.acquire()
            try:
                Execute(kinitcmd,
                        user=hive_user,
                        path=[
                            "/bin/", "/usr/bin/", "/usr/lib/hive/bin/",
                            "/usr/sbin/"
                        ],
                        timeout=10)
            finally:
                kinit_lock.release()

        start_time = time.time()
        if STACK_NAME in configurations and STACK_ROOT in configurations:
            stack_root = stack_tools.get_stack_root(configurations[STACK_NAME],
                                                    configurations[STACK_ROOT])

            llap_status_cmd = stack_root + format(
                "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name}  --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}"
            )
        else:
            llap_status_cmd = STACK_ROOT_DEFAULT + format(
                "/current/hive-server2-hive2/bin/hive --service llapstatus --name {llap_app_name} --findAppTimeout {LLAP_APP_STATUS_CMD_TIMEOUT}"
            )

        code, output, error = shell.checked_call(llap_status_cmd,
                                                 user=hive_user,
                                                 stderr=subprocess.PIPE,
                                                 timeout=check_command_timeout,
                                                 logoutput=False)
        # Call for getting JSON
        llap_app_info = make_valid_json(output)

        if llap_app_info is None or 'state' not in llap_app_info:
            alert_label = traceback.format_exc()
            result_code = UKNOWN_STATUS_CODE
            return (result_code, [alert_label])

        retrieved_llap_app_state = llap_app_info['state'].upper()
        if retrieved_llap_app_state in ['RUNNING_ALL']:
            result_code = OK_RESULT_CODE
            total_time = time.time() - start_time
            alert_label = OK_MESSAGE.format(
                llap_app_state_dict.get(retrieved_llap_app_state,
                                        retrieved_llap_app_state), total_time)
        elif retrieved_llap_app_state in ['RUNNING_PARTIAL']:
            live_instances = 0
            desired_instances = 0
            percentInstancesUp = 0
            percent_desired_instances_to_be_up = 80
            # Get 'live' and 'desired' instances
            if 'liveInstances' not in llap_app_info or 'desiredInstances' not in llap_app_info:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time)
                return (result_code, [alert_label])

            live_instances = llap_app_info['liveInstances']
            desired_instances = llap_app_info['desiredInstances']
            if live_instances < 0 or desired_instances <= 0:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time)
                return (result_code, [alert_label])

            percentInstancesUp = float(
                live_instances) / desired_instances * 100
            if percentInstancesUp >= percent_desired_instances_to_be_up:
                result_code = OK_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time, llap_app_info['liveInstances'],
                    llap_app_info['desiredInstances'])
            else:
                result_code = CRITICAL_RESULT_CODE
                total_time = time.time() - start_time
                alert_label = MESSAGE_WITH_STATE_AND_INSTANCES.format(
                    llap_app_state_dict.get(retrieved_llap_app_state,
                                            retrieved_llap_app_state),
                    total_time, llap_app_info['liveInstances'],
                    llap_app_info['desiredInstances'])
        else:
            result_code = CRITICAL_RESULT_CODE
            total_time = time.time() - start_time
            alert_label = CRITICAL_MESSAGE_WITH_STATE.format(
                llap_app_state_dict.get(retrieved_llap_app_state,
                                        retrieved_llap_app_state), total_time)
    except:
        alert_label = traceback.format_exc()
        traceback.format_exc()
        result_code = UKNOWN_STATUS_CODE
    return (result_code, [alert_label])
    def _llap_start(self, env, cleanup=False):
      import params
      env.set_params(params)
      Logger.info("Starting LLAP")

      # TODO, start only if not already running.
      # TODO : Currently hardcoded the params. Need to read the suggested values from hive2/hive-site.xml.
      # TODO, ensure that script works as hive from cmd when not cd'ed in /homve/hive
      # Needs permission to write to hive home dir.

      cmd = ''
      if params.security_enabled:
        cmd = format("{stack_root}/current/hive-server2-hive2/bin/hive --service llap --instances 1 -slider-am-container-mb "
                     "{slider_am_container_mb} --slider-keytab-dir .slider/keytabs/{params.hive_user}/ --slider-keytab "
                     "{hive_llap_keytab_file} --slider-principal {hive_headless_keytab} --loglevel INFO")
      else:
        cmd = format("{stack_root}/current/hive-server2-hive2/bin/hive --service llap --instances 1 -slider-am-container-mb {slider_am_container_mb} --loglevel INFO")

      run_file_path = None
      try:
        Logger.info(format("Command: {cmd}"))
        cmd = cmd.split()
        code, output, error = shell.checked_call(cmd, user=params.hive_user, stderr=subprocess.PIPE, logoutput=True)

        if code != 0 or output is None:
          raise Fail("Command failed with either non-zero return code or no output.")

        # E.g., output:
        # Prepared llap-slider-05Apr2016/run.sh for running LLAP on Slider
        exp = r"Prepared (.*?run.sh) for running LLAP"
        m = re.match(exp, output, re.I)
        if m and len(m.groups()) == 1:
          run_file_name = m.group(1)
          run_file_path = os.path.join(params.hive_user_home_dir, run_file_name)
        else:
          raise Fail("Did not find run.sh file in output: " + str(output))

        Logger.info(format("Run file path: {run_file_path}"))
        if os.path.isfile(run_file_path):
          Execute(run_file_path, user=params.hive_user)

          # TODO : Sleep below is not a good idea. We need to check the status of LLAP app to figure out it got
          # launched properly and is in running state. Then go ahead with Hive Interactive Server start.
          Logger.info("Sleeping for 30 secs")
          time.sleep(30)
          Logger.info("LLAP app deployed successfully.")
          return True
        else:
          raise Fail(format("Did not find run file {run_file_path}"))
      except:
        # Attempt to clean up the packaged application, or potentially rename it with a .bak
        if run_file_path is not None and cleanup:
          try:
            parent_dir = os.path.dirname(run_file_path)
            if os.path.isdir(parent_dir):
              shutil.rmtree(parent_dir)
          except Exception, e:
            Logger.error("Could not cleanup LLAP app package. Error: " + str(e))

        # throw the original exception
        raise
Example #42
0
    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
'''
# MUST be run on ambari-server host
import json
import time
from resource_management.core.shell import checked_call, call

# Change this to hostname of your ambari-server
HOSTNAME = checked_call("hostname -f")[1].strip()

############# Configurations (feel free to change) #############

SERVICE_NAME = "STORM"

COMPONENTS = ["NIMBUS", "SUPERVISOR"]

COMPONENTS_TO_HOSTS = [
    {
        "NIMBUS": HOSTNAME
    },
    {
        "SUPERVISOR": HOSTNAME
    },
    #{"SUPERVISOR": "c6402.ambari.apache.org"},
Example #43
0
def main():
    # add service
    checked_call(
        'curl -H \'X-Requested-By:anything\' -i -X POST -d \'[{{"ServiceInfo":{{"service_name":"{service_name}"}}}}]\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/services'
        .format(service_name=SERVICE_NAME,
                server_url=SERVER_URL,
                cluster_name=CLUSTER_NAME))

    # add components
    for component in COMPONENTS:
        checked_call(
            'curl -H \'X-Requested-By:anything\' -i -X POST -d \'{{"components":[{{"ServiceComponentInfo":{{"component_name":"{component}"}}}}]}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/services?ServiceInfo/service_name={service_name}'
            .format(service_name=SERVICE_NAME,
                    component=component,
                    server_url=SERVER_URL,
                    cluster_name=CLUSTER_NAME))

    # assign components to hosts
    for x in COMPONENTS_TO_HOSTS:
        for component, host in x.iteritems():
            checked_call(
                'curl -H \'X-Requested-By:anything\' -i -X POST -d \'{{"host_components":[{{"HostRoles":{{"component_name":"{component}"}}}}]}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/hosts?Hosts/host_name={host}'
                .format(host=host,
                        component=component,
                        server_url=SERVER_URL,
                        cluster_name=CLUSTER_NAME))

    # update and create all the service-specific configurations
    checked_call(
        'curl -H \'X-Requested-By:anything\'-X GET -u admin:admin {server_url}/api/v1/stacks2/HDP/versions/{stack_version}/stackServices/{service_name}/configurations?fields=* > /tmp/config.json'
        .format(server_url=SERVER_URL,
                stack_version=STACK_VERSION,
                service_name=SERVICE_NAME))
    with open('/tmp/config.json', "r") as f:
        d = json.load(f)

    configs = {}
    for x in d['items']:
        site_name = x['StackConfigurations']['type'][:-4]
        if not site_name in configs:
            configs[site_name] = {}
        config = configs[site_name]
        config[x['StackConfigurations']
               ['property_name']] = x['StackConfigurations']['property_value']

    for site_name, site_content in configs.iteritems():
        code = call(
            '/var/lib/ambari-server/resources/scripts/configs.sh get {hostname} {cluster_name} {site_name}'
            .format(hostname=HOSTNAME,
                    cluster_name=CLUSTER_NAME,
                    site_name=site_name))[0]

        if code:
            print "Adding new site: " + site_name
            checked_call(
                'curl -i -H \'X-Requested-By:anything\' -X PUT -d \'{{"Clusters":{{"desired_configs":{{"type":"{site_name}","tag":"version1","properties":{site_content}}}}}}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}'
                .format(site_name=site_name,
                        site_content=json.dumps(site_content),
                        server_url=SERVER_URL,
                        cluster_name=CLUSTER_NAME))
        else:
            timestamp = int(time.time())
            print "Modifiying site: " + site_name + " version" + str(timestamp)
            checked_call(
                '/var/lib/ambari-server/resources/scripts/configs.sh get {hostname} {cluster_name} {site_name} /tmp/current_site.json'
                .format(hostname=HOSTNAME,
                        cluster_name=CLUSTER_NAME,
                        site_name=site_name))

            with open('/tmp/current_site.json', "r") as f:
                fcontent = f.read()
                d = json.loads("{" + fcontent + "}")

            for k, v in site_content.iteritems():
                d['properties'][k] = v

            checked_call(
                'curl -i -H \'X-Requested-By:anything\' -X PUT -d \'{{"Clusters":{{"desired_configs":{{"type":"{site_name}","tag":"version{timestamp}","properties":{site_content}}}}}}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}'
                .format(site_name=site_name,
                        timestamp=timestamp,
                        site_content=json.dumps(d['properties']),
                        server_url=SERVER_URL,
                        cluster_name=CLUSTER_NAME))

    for site_name, site_configs in CONFIGS_TO_CHANGE.iteritems():
        for config_name, config_value in site_configs.iteritems():
            print "Adding config " + config_name + "=" + config_value + " to " + site_name
            checked_call(
                '/var/lib/ambari-server/resources/scripts/configs.sh set {hostname} {cluster_name} {site_name} {config_name} {config_value}'
                .format(config_name=config_name,
                        config_value=config_value,
                        hostname=HOSTNAME,
                        cluster_name=CLUSTER_NAME,
                        site_name=site_name))

    # install all new components
    checked_call(
        'curl -H \'X-Requested-By:anything\' -i -X PUT -d  \'{{"RequestInfo": {{"context" :"Installing Services"}}, "Body": {{"ServiceInfo": {{"state": "INSTALLED"}}}}}}\' -u admin:admin {server_url}/api/v1/clusters/{cluster_name}/services?ServiceInfo/state=INIT'
        .format(server_url=SERVER_URL, cluster_name=CLUSTER_NAME))
Example #44
0
def curl_krb_request(tmp_dir, keytab, principal, url, cache_file_prefix,
    krb_exec_search_paths, return_only_http_code, caller_label, user,
    connection_timeout = CONNECTION_TIMEOUT_DEFAULT,
    kinit_timer_ms=DEFAULT_KERBEROS_KINIT_TIMER_MS, method = '',body='',header=''):
  """
  Makes a curl request using the kerberos credentials stored in a calculated cache file. The
  cache file is created by combining the supplied principal, keytab, user, and request name into
  a unique hash.

  This function will use the klist command to determine if the cache is expired and will perform
  a kinit if necessary. Additionally, it has an internal timer to force a kinit after a
  configurable amount of time. This is to prevent boundary issues where requests hit the edge
  of a ticket's lifetime.

  :param tmp_dir: the directory to use for storing the local kerberos cache for this request.
  :param keytab: the location of the keytab to use when performing a kinit
  :param principal: the principal to use when performing a kinit
  :param url: the URL to request
  :param cache_file_prefix: an identifier used to build the unique cache name for this request.
                            This ensures that multiple requests can use the same cache.
  :param krb_exec_search_paths: the search path to use for invoking kerberos binaries
  :param return_only_http_code: True to return only the HTTP code, False to return GET content
  :param caller_label: an identifier to give context into the caller of this module (used for logging)
  :param user: the user to invoke the curl command as
  :param connection_timeout: if specified, a connection timeout for curl (default 10 seconds)
  :param kinit_timer_ms: if specified, the time (in ms), before forcing a kinit even if the
                         klist cache is still valid.
  :return:
  """

  import uuid

  # start off false
  is_kinit_required = False

  # Create the kerberos credentials cache (ccache) file and set it in the environment to use
  # when executing curl. Use the md5 hash of the combination of the principal and keytab file
  # to generate a (relatively) unique cache filename so that we can use it as needed. Scope
  # this file by user in order to prevent sharing of cache files by multiple users.
  ccache_file_name = _md5("{0}|{1}".format(principal, keytab)).hexdigest()

  curl_krb_cache_path = os.path.join(tmp_dir, "curl_krb_cache")
  if not os.path.exists(curl_krb_cache_path):
    os.makedirs(curl_krb_cache_path)
  os.chmod(curl_krb_cache_path, 0777)

  ccache_file_path = "{0}{1}{2}_{3}_cc_{4}".format(curl_krb_cache_path, os.sep, cache_file_prefix, user, ccache_file_name)
  kerberos_env = {'KRB5CCNAME': ccache_file_path}

  # concurrent kinit's can cause the following error:
  # Internal credentials cache error while storing credentials while getting initial credentials
  kinit_lock = global_lock.get_lock(global_lock.LOCK_TYPE_KERBEROS)
  kinit_lock.acquire()
  try:
    # If there are no tickets in the cache or they are expired, perform a kinit, else use what
    # is in the cache
    if krb_exec_search_paths:
      klist_path_local = get_klist_path(krb_exec_search_paths)
    else:
      klist_path_local = get_klist_path()

    # take a look at the last time kinit was run for the specified cache and force a new
    # kinit if it's time; this helps to avoid problems approaching ticket boundary when
    # executing a klist and then a curl
    last_kinit_time = _KINIT_CACHE_TIMES.get(ccache_file_name, 0)
    current_time = long(time.time())
    if current_time - kinit_timer_ms > last_kinit_time:
      is_kinit_required = True

    # if the time has not expired, double-check that the cache still has a valid ticket
    if not is_kinit_required:
      klist_command = "{0} -s {1}".format(klist_path_local, ccache_file_path)
      is_kinit_required = (shell.call(klist_command, user=user)[0] != 0)

    # if kinit is required, the perform the kinit
    if is_kinit_required:
      if krb_exec_search_paths:
        kinit_path_local = get_kinit_path(krb_exec_search_paths)
      else:
        kinit_path_local = get_kinit_path()

      logger.debug("Enabling Kerberos authentication for %s via GSSAPI using ccache at %s",
        caller_label, ccache_file_path)

      # kinit; there's no need to set a ticket timeout as this will use the default invalidation
      # configured in the krb5.conf - regenerating keytabs will not prevent an existing cache
      # from working correctly
      shell.checked_call("{0} -c {1} -kt {2} {3} > /dev/null".format(kinit_path_local,
        ccache_file_path, keytab, principal), user=user)

      # record kinit time
      _KINIT_CACHE_TIMES[ccache_file_name] = current_time
    else:
      # no kinit needed, use the cache
      logger.debug("Kerberos authentication for %s via GSSAPI already enabled using ccache at %s.",
        caller_label, ccache_file_path)
  finally:
    kinit_lock.release()

  # check if cookies dir exists, if not then create it
  cookies_dir = os.path.join(tmp_dir, "cookies")

  if not os.path.exists(cookies_dir):
    os.makedirs(cookies_dir)

  cookie_file_name = str(uuid.uuid4())
  cookie_file = os.path.join(cookies_dir, cookie_file_name)

  start_time = time.time()
  error_msg = None

  # setup timeouts for the request; ensure we use integers since that is what curl needs
  connection_timeout = int(connection_timeout)
  maximum_timeout = connection_timeout + 2

  try:
    if return_only_http_code:
      _, curl_stdout, curl_stderr = get_user_call_output(['curl', '--location-trusted', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file, '-w',
                             '%{http_code}', url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout), '-o', '/dev/null'],
                             user=user, env=kerberos_env)
    else:
      curl_command = ['curl', '--location-trusted', '-k', '--negotiate', '-u', ':', '-b', cookie_file, '-c', cookie_file,
                      url, '--connect-timeout', str(connection_timeout), '--max-time', str(maximum_timeout)]
      # returns response body
      if len(method) > 0 and len(body) == 0 and len(header) == 0:
        curl_command.extend(['-X', method])

      elif len(method) > 0 and len(body) == 0 and len(header) > 0:
        curl_command.extend(['-H', header, '-X', method])

      elif len(method) > 0 and len(body) > 0 and len(header) == 0:
        curl_command.extend(['-X', method, '-d', body])

      elif len(method) > 0 and len(body) > 0 and len(header) > 0:
        curl_command.extend(['-H', header, '-X', method, '-d', body])

      _, curl_stdout, curl_stderr = get_user_call_output(curl_command, user=user, env=kerberos_env)

  except Fail:
    if logger.isEnabledFor(logging.DEBUG):
      logger.exception("Unable to make a curl request for {0}.".format(caller_label))
    raise
  finally:
    if os.path.isfile(cookie_file):
      os.remove(cookie_file)

  # empty quotes evaluates to false
  if curl_stderr:
    error_msg = curl_stderr

  time_millis = time.time() - start_time

  # empty quotes evaluates to false
  if curl_stdout:
    if return_only_http_code:
      return (int(curl_stdout), error_msg, time_millis)
    else:
      return (curl_stdout, error_msg, time_millis)

  logger.debug("The curl response for %s is empty; standard error = %s",
    caller_label, str(error_msg))

  return ("", error_msg, time_millis)
    def _llap_start(self, env, cleanup=False):
        import params
        env.set_params(params)

        if params.hive_server_interactive_ha:
            """
        Check llap app state
        """
            Logger.info(
                "HSI HA is enabled. Checking if LLAP is already running ...")
            status = self.check_llap_app_status(
                params.llap_app_name, 2, params.hive_server_interactive_ha)
            if status:
                Logger.info("LLAP app '{0}' is already running.".format(
                    params.llap_app_name))
                return True
            else:
                Logger.info(
                    "LLAP app '{0}' is not running. llap will be started.".
                    format(params.llap_app_name))
            pass

        # Call for cleaning up the earlier run(s) LLAP package folders.
        self._cleanup_past_llap_package_dirs()

        Logger.info("Starting LLAP")
        LLAP_PACKAGE_CREATION_PATH = Script.get_tmp_dir()

        unique_name = "llap-yarn-service_%s" % datetime.utcnow().strftime(
            '%Y-%m-%d_%H-%M-%S')

        cmd = format(
            "{stack_root}/current/hive-server2/bin/hive --service llap --size {params.llap_daemon_container_size}m --startImmediately --name {params.llap_app_name} "
            "--cache {params.hive_llap_io_mem_size}m --xmx {params.llap_heap_size}m --loglevel {params.llap_log_level} "
            "--output {LLAP_PACKAGE_CREATION_PATH}/{unique_name}")

        # Append params that are supported from Hive llap GA version.
        # TODO: All the code related to Slider Anti-affinity will be removed and
        # replaced with YARN rich placement once YARN-6599 (umbrella YARN-6592)
        # is committed.
        # Figure out the Slider Anti-affinity to be used.
        # YARN does not support anti-affinity, and therefore Slider implements AA by the means of exclusion lists, i.e, it
        # starts containers one by one and excludes the nodes it gets (adding a delay of ~2sec./machine). When the LLAP
        # container memory size configuration is more than half of YARN node memory, AA is implicit and should be avoided.
        slider_placement = 4
        if long(params.llap_daemon_container_size) > (
                0.5 * long(params.yarn_nm_mem)):
            slider_placement = 0
            Logger.info(
                "Setting slider_placement : 0, as llap_daemon_container_size : {0} > 0.5 * "
                "YARN NodeManager Memory({1})".format(
                    params.llap_daemon_container_size, params.yarn_nm_mem))
        else:
            Logger.info(
                "Setting slider_placement: 4, as llap_daemon_container_size : {0} <= 0.5 * "
                "YARN NodeManager Memory({1})".format(
                    params.llap_daemon_container_size, params.yarn_nm_mem))
        cmd += format(
            " --service-placement {slider_placement} --skiphadoopversion --auxhbase=false --skiphbasecp --instances {params.num_llap_daemon_running_nodes}"
        )

        # Setup the logger for the ga version only
        cmd += format(" --logger {params.llap_logger}")

        if params.security_enabled:
            llap_keytab_splits = params.hive_llap_keytab_file.split("/")
            Logger.debug("llap_keytab_splits : {0}".format(llap_keytab_splits))
            cmd += format(
                " --service-keytab-dir .yarn/keytabs/{params.hive_user}/ --service-keytab "
                "{llap_keytab_splits[4]} --service-principal {params.hive_llap_principal}"
            )

        # Add the aux jars if they are specified. If empty, dont need to add this param.
        if params.hive_aux_jars:
            cmd += format(" --auxjars {params.hive_aux_jars}")

        # Append args.
        llap_java_args = InlineTemplate(
            params.llap_app_java_opts).get_content()
        cmd += format(" --args \" {llap_java_args}\"")
        # Append metaspace size to args.
        if params.java_version > 7 and params.llap_daemon_container_size > 4096:
            if params.llap_daemon_container_size <= 32768:
                metaspaceSize = "256m"
            else:
                metaspaceSize = "1024m"
            cmd = cmd[:-1] + " -XX:MetaspaceSize=" + metaspaceSize + "\""

        try:
            Logger.info(format("LLAP start command: {cmd}"))
            code, output, error = shell.checked_call(
                cmd,
                user=params.hive_user,
                quiet=True,
                stderr=subprocess.PIPE,
                logoutput=True,
                env={'HIVE_CONF_DIR': params.hive_server_interactive_conf_dir})

            if code != 0 or output is None:
                raise Fail(
                    "Command failed with either non-zero return code or no output."
                )

            # We need to check the status of LLAP app to figure out it got
            # launched properly and is in running state. Then go ahead with Hive Interactive Server start.
            status = self.check_llap_app_status(
                params.llap_app_name,
                params.num_retries_for_checking_llap_status)
            if status:
                Logger.info("LLAP app '{0}' deployed successfully.".format(
                    params.llap_app_name))
                return True
            else:
                Logger.error("LLAP app '{0}' deployment unsuccessful.".format(
                    params.llap_app_name))
                return False
        except:
            if params.hive_server_interactive_ha:
                Logger.error(
                    "Exception occured. Checking if LLAP was started by another HSI instance ..."
                )
                status = self.check_llap_app_status(
                    params.llap_app_name, 2, params.hive_server_interactive_ha)
                if status:
                    Logger.info("LLAP app '{0}' is running.".format(
                        params.llap_app_name))
                    return True
                else:
                    Logger.info("LLAP app '{0}' is not running.".format(
                        params.llap_app_name))

                raise  # throw the original exception
Example #46
0
 def action_remove(self):
     if self.user:
         command = ['userdel', self.resource.username]
         shell.checked_call(command)
         Logger.info("Removed user %s" % self.resource)
Example #47
0
 def chmod(path, mode):
     shell.checked_call(["chmod", oct(mode), path], sudo=True)
Example #48
0
def flume(action=None):
    import params

    if action == 'config':
        # remove previously defined meta's
        for n in find_expected_agent_names(params.flume_conf_dir):
            File(
                os.path.join(params.flume_conf_dir, n, 'ambari-meta.json'),
                action="delete",
            )

        Directory(
            params.flume_run_dir,
            group=params.user_group,
            owner=params.flume_user,
        )

        Directory(
            params.flume_conf_dir,
            create_parents=True,
            owner=params.flume_user,
        )
        Directory(
            params.flume_log_dir,
            group=params.user_group,
            owner=params.flume_user,
            create_parents=True,
            cd_access="a",
            mode=0755,
        )

        flume_agents = {}
        if params.flume_conf_content is not None:
            flume_agents = build_flume_topology(params.flume_conf_content)

        for agent in flume_agents.keys():
            flume_agent_conf_dir = os.path.join(params.flume_conf_dir, agent)
            flume_agent_conf_file = os.path.join(flume_agent_conf_dir,
                                                 'flume.conf')
            flume_agent_meta_file = os.path.join(flume_agent_conf_dir,
                                                 'ambari-meta.json')
            flume_agent_log4j_file = os.path.join(flume_agent_conf_dir,
                                                  'log4j.properties')
            flume_agent_env_file = os.path.join(flume_agent_conf_dir,
                                                'flume-env.sh')

            Directory(
                flume_agent_conf_dir,
                owner=params.flume_user,
            )

            PropertiesFile(flume_agent_conf_file,
                           properties=flume_agents[agent],
                           owner=params.flume_user,
                           mode=0644)

            File(flume_agent_log4j_file,
                 content=InlineTemplate(params.flume_log4j_content,
                                        agent_name=agent),
                 owner=params.flume_user,
                 mode=0644)

            File(flume_agent_meta_file,
                 content=json.dumps(ambari_meta(agent, flume_agents[agent])),
                 owner=params.flume_user,
                 mode=0644)

            File(flume_agent_env_file,
                 owner=params.flume_user,
                 content=InlineTemplate(params.flume_env_sh_template))

            if params.has_metric_collector:
                File(os.path.join(flume_agent_conf_dir,
                                  "flume-metrics2.properties"),
                     owner=params.flume_user,
                     content=Template("flume-metrics2.properties.j2"))

    elif action == 'start':
        # desired state for service should be STARTED
        if len(params.flume_command_targets) == 0:
            _set_desired_state('STARTED')

        # It is important to run this command as a background process.

        flume_base = as_user(format(
            "{flume_bin} agent --name {{0}} --conf {{1}} --conf-file {{2}} {{3}} > {flume_log_dir}/{{4}}.out 2>&1"
        ),
                             params.flume_user,
                             env={'JAVA_HOME': params.java_home}) + " &"

        for agent in cmd_target_names():
            flume_agent_conf_dir = params.flume_conf_dir + os.sep + agent
            flume_agent_conf_file = flume_agent_conf_dir + os.sep + "flume.conf"
            flume_agent_pid_file = params.flume_run_dir + os.sep + agent + ".pid"

            if not os.path.isfile(flume_agent_conf_file):
                continue

            if not is_flume_process_live(flume_agent_pid_file):
                # TODO someday make the ganglia ports configurable
                extra_args = ''
                if params.ganglia_server_host is not None:
                    extra_args = '-Dflume.monitoring.type=ganglia -Dflume.monitoring.hosts={0}:{1}'
                    extra_args = extra_args.format(params.ganglia_server_host,
                                                   '8655')
                if params.has_metric_collector:
                    extra_args = '-Dflume.monitoring.type=org.apache.hadoop.metrics2.sink.flume.FlumeTimelineMetricsSink ' \
                                 '-Dflume.monitoring.node={0}:{1}'
                    # TODO check if this is used.
                    extra_args = extra_args.format(
                        params.metric_collector_host,
                        params.metric_collector_port)

                flume_cmd = flume_base.format(agent, flume_agent_conf_dir,
                                              flume_agent_conf_file,
                                              extra_args, agent)

                Execute(flume_cmd,
                        wait_for_finish=False,
                        environment={'JAVA_HOME': params.java_home})
                # sometimes startup spawns a couple of threads - so only the first line may count
                pid_cmd = as_sudo(('pgrep', '-o', '-u', params.flume_user, '-f', format('^{java_home}.*{agent}.*'))) + \
                " | " + as_sudo(('tee', flume_agent_pid_file)) + "  && test ${PIPESTATUS[0]} -eq 0"

                try:
                    Execute(pid_cmd, logoutput=True, tries=20, try_sleep=10)
                except:
                    show_logs(params.flume_log_dir, params.flume_user)
                    raise

        pass
    elif action == 'stop':
        # desired state for service should be INSTALLED
        if len(params.flume_command_targets) == 0:
            _set_desired_state('INSTALLED')

        pid_files = glob.glob(params.flume_run_dir + os.sep + "*.pid")

        if 0 == len(pid_files):
            return

        agent_names = cmd_target_names()

        for agent in agent_names:
            pid_file = format("{flume_run_dir}/{agent}.pid")

            if is_flume_process_live(pid_file):
                pid = shell.checked_call(("cat", pid_file),
                                         sudo=True)[1].strip()
                Execute(("kill", "-15", pid),
                        sudo=True)  # kill command has to be a tuple
                if not await_flume_process_termination(pid_file, try_count=30):
                    Execute(("kill", "-9", pid), sudo=True)

            if not await_flume_process_termination(pid_file, try_count=10):
                show_logs(params.flume_log_dir, params.flume_user)
                raise Fail("Can't stop flume agent: {0}".format(agent))

            File(pid_file, action='delete')
Example #49
0
 def makedir(path, mode):
     shell.checked_call(["mkdir", path], sudo=True)
     chmod(path, mode)
Example #50
0
    def _llap_start(self, env, cleanup=False):
        import params
        env.set_params(params)
        Logger.info("Starting LLAP")
        LLAP_PACKAGE_CREATION_PATH = Script.get_tmp_dir()
        LLAP_APP_NAME = 'llap0'

        unique_name = "llap-slider%s" % datetime.utcnow().strftime(
            '%Y-%m-%d_%H-%M-%S')

        cmd = format(
            "{stack_root}/current/hive-server2-hive2/bin/hive --service llap --instances {params.num_llap_nodes}"
            " --slider-am-container-mb {params.slider_am_container_mb} --size {params.llap_daemon_container_size}m "
            " --cache {params.hive_llap_io_mem_size}m --xmx {params.llap_heap_size}m --loglevel {params.llap_log_level}"
            " --output {LLAP_PACKAGE_CREATION_PATH}/{unique_name}")
        if params.security_enabled:
            llap_keytab_splits = params.hive_llap_keytab_file.split("/")
            Logger.debug("llap_keytab_splits : {0}".format(llap_keytab_splits))
            cmd += format(
                " --slider-keytab-dir .slider/keytabs/{params.hive_user}/ --slider-keytab "
                "{llap_keytab_splits[4]} --slider-principal {params.hive_llap_principal}"
            )

        # Add the aux jars if they are specified. If empty, dont need to add this param.
        if params.hive_aux_jars:
            cmd += format(" --auxjars {params.hive_aux_jars}")

        # Append args.
        llap_java_args = InlineTemplate(
            params.llap_app_java_opts).get_content()
        cmd += format(" --args \" {llap_java_args}\"")

        run_file_path = None
        try:
            Logger.info(format("Command: {cmd}"))
            code, output, error = shell.checked_call(cmd,
                                                     user=params.hive_user,
                                                     stderr=subprocess.PIPE,
                                                     logoutput=True)

            if code != 0 or output is None:
                raise Fail(
                    "Command failed with either non-zero return code or no output."
                )

            # E.g., output:
            # Prepared llap-slider-05Apr2016/run.sh for running LLAP on Slider
            exp = r"Prepared (.*?run.sh) for running LLAP"
            run_file_path = None
            out_splits = output.split("\n")
            for line in out_splits:
                line = line.strip()
                m = re.match(exp, line, re.I)
                if m and len(m.groups()) == 1:
                    run_file_name = m.group(1)
                    run_file_path = os.path.join(params.hive_user_home_dir,
                                                 run_file_name)
                    break
            if not run_file_path:
                raise Fail("Did not find run.sh file in output: " +
                           str(output))

            Logger.info(format("Run file path: {run_file_path}"))
            Execute(run_file_path, user=params.hive_user, logoutput=True)
            Logger.info("Submitted LLAP app name : {0}".format(LLAP_APP_NAME))

            # We need to check the status of LLAP app to figure out it got
            # launched properly and is in running state. Then go ahead with Hive Interactive Server start.
            status = self.check_llap_app_status(
                LLAP_APP_NAME, params.num_retries_for_checking_llap_status)
            if status:
                Logger.info("LLAP app '{0}' deployed successfully.".format(
                    LLAP_APP_NAME))
                return True
            else:
                Logger.error("LLAP app '{0}' deployment unsuccessful.".format(
                    LLAP_APP_NAME))
                return False
        except:
            # Attempt to clean up the packaged application, or potentially rename it with a .bak
            if run_file_path is not None and cleanup:
                try:
                    parent_dir = os.path.dirname(run_file_path)
                    if os.path.isdir(parent_dir):
                        shutil.rmtree(parent_dir)
                except Exception, e:
                    Logger.error(
                        "Could not cleanup LLAP app package. Error: " + str(e))

            # throw the original exception
            raise
Example #51
0
 def unlink(path):
     shell.checked_call(["rm", "-f", path], sudo=True)
Example #52
0
    def _llap_start(self, env, cleanup=False):
        import params
        env.set_params(params)

        if params.hive_server_interactive_ha:
            """
        Check llap app state
        """
            Logger.info(
                "HSI HA is enabled. Checking if LLAP is already running ...")
            if params.stack_supports_hive_interactive_ga:
                status = self.check_llap_app_status_in_llap_ga(
                    params.llap_app_name, 2, params.hive_server_interactive_ha)
            else:
                status = self.check_llap_app_status_in_llap_tp(
                    params.llap_app_name, 2, params.hive_server_interactive_ha)

            if status:
                Logger.info("LLAP app '{0}' is already running.".format(
                    params.llap_app_name))
                return True
            else:
                Logger.info(
                    "LLAP app '{0}' is not running. llap will be started.".
                    format(params.llap_app_name))
            pass

        # Call for cleaning up the earlier run(s) LLAP package folders.
        self._cleanup_past_llap_package_dirs()

        Logger.info("Starting LLAP")
        LLAP_PACKAGE_CREATION_PATH = Script.get_tmp_dir()

        unique_name = "llap-slider%s" % datetime.utcnow().strftime(
            '%Y-%m-%d_%H-%M-%S')

        cmd = format(
            "{stack_root}/current/hive-server2-hive2/bin/hive --service llap --slider-am-container-mb {params.slider_am_container_mb} "
            "--size {params.llap_daemon_container_size}m --cache {params.hive_llap_io_mem_size}m --xmx {params.llap_heap_size}m "
            "--loglevel {params.llap_log_level} {params.llap_extra_slider_opts} --output {LLAP_PACKAGE_CREATION_PATH}/{unique_name}"
        )

        # Append params that are supported from Hive llap GA version.
        if params.stack_supports_hive_interactive_ga:
            # Figure out the Slider Anti-affinity to be used.
            # YARN does not support anti-affinity, and therefore Slider implements AA by the means of exclusion lists, i.e, it
            # starts containers one by one and excludes the nodes it gets (adding a delay of ~2sec./machine). When the LLAP
            # container memory size configuration is more than half of YARN node memory, AA is implicit and should be avoided.
            slider_placement = 4
            if long(params.llap_daemon_container_size) > (
                    0.5 * long(params.yarn_nm_mem)):
                slider_placement = 0
                Logger.info(
                    "Setting slider_placement : 0, as llap_daemon_container_size : {0} > 0.5 * "
                    "YARN NodeManager Memory({1})".format(
                        params.llap_daemon_container_size, params.yarn_nm_mem))
            else:
                Logger.info(
                    "Setting slider_placement: 4, as llap_daemon_container_size : {0} <= 0.5 * "
                    "YARN NodeManager Memory({1})".format(
                        params.llap_daemon_container_size, params.yarn_nm_mem))
            cmd += format(
                " --slider-placement {slider_placement} --skiphadoopversion --skiphbasecp --instances {params.num_llap_daemon_running_nodes}"
            )

            # Setup the logger for the ga version only
            cmd += format(" --logger {params.llap_logger}")
        else:
            cmd += format(" --instances {params.num_llap_nodes}")
        if params.security_enabled:
            llap_keytab_splits = params.hive_llap_keytab_file.split("/")
            Logger.debug("llap_keytab_splits : {0}".format(llap_keytab_splits))
            cmd += format(
                " --slider-keytab-dir .slider/keytabs/{params.hive_user}/ --slider-keytab "
                "{llap_keytab_splits[4]} --slider-principal {params.hive_llap_principal}"
            )

        # Add the aux jars if they are specified. If empty, dont need to add this param.
        if params.hive_aux_jars:
            cmd += format(" --auxjars {params.hive_aux_jars}")

        # Append args.
        llap_java_args = InlineTemplate(
            params.llap_app_java_opts).get_content()
        cmd += format(" --args \" {llap_java_args}\"")
        # Append metaspace size to args.
        if params.java_version > 7 and params.llap_daemon_container_size > 4096:
            if params.llap_daemon_container_size <= 32768:
                metaspaceSize = "256m"
            else:
                metaspaceSize = "1024m"
            cmd = cmd[:-1] + " -XX:MetaspaceSize=" + metaspaceSize + "\""

        run_file_path = None
        try:
            Logger.info(format("LLAP start command: {cmd}"))
            code, output, error = shell.checked_call(cmd,
                                                     user=params.hive_user,
                                                     quiet=True,
                                                     stderr=subprocess.PIPE,
                                                     logoutput=True)

            if code != 0 or output is None:
                raise Fail(
                    "Command failed with either non-zero return code or no output."
                )

            # E.g., output:
            # Prepared llap-slider-05Apr2016/run.sh for running LLAP on Slider
            exp = r"Prepared (.*?run.sh) for running LLAP"
            run_file_path = None
            out_splits = output.split("\n")
            for line in out_splits:
                line = line.strip()
                m = re.match(exp, line, re.I)
                if m and len(m.groups()) == 1:
                    run_file_name = m.group(1)
                    run_file_path = os.path.join(params.hive_user_home_dir,
                                                 run_file_name)
                    break
            if not run_file_path:
                raise Fail("Did not find run.sh file in output: " +
                           str(output))

            Logger.info(format("Run file path: {run_file_path}"))
            Execute(run_file_path, user=params.hive_user, logoutput=True)
            Logger.info("Submitted LLAP app name : {0}".format(
                params.llap_app_name))

            # We need to check the status of LLAP app to figure out it got
            # launched properly and is in running state. Then go ahead with Hive Interactive Server start.
            if params.stack_supports_hive_interactive_ga:
                status = self.check_llap_app_status_in_llap_ga(
                    params.llap_app_name,
                    params.num_retries_for_checking_llap_status)
            else:
                status = self.check_llap_app_status_in_llap_tp(
                    params.llap_app_name,
                    params.num_retries_for_checking_llap_status)
            if status:
                Logger.info("LLAP app '{0}' deployed successfully.".format(
                    params.llap_app_name))
                return True
            else:
                Logger.error("LLAP app '{0}' deployment unsuccessful.".format(
                    params.llap_app_name))
                return False
        except:
            # Attempt to clean up the packaged application, or potentially rename it with a .bak
            if run_file_path is not None and cleanup:
                parent_dir = os.path.dirname(run_file_path)
                Directory(
                    parent_dir,
                    action="delete",
                    ignore_failures=True,
                )

            # throw the original exception
            raise
Example #53
0
 def action_remove(self):
     if self.group:
         command = ['groupdel', self.resource.group_name]
         shell.checked_call(command)
         Logger.info("Removed group %s" % self.resource)
Example #54
0
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None):
  """
   If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server,
  then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g.,
  /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive

  :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING
  :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE.
  """
  import params

  # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction.
  effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version)
  if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version):
    return
    
  # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server.
  if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]):
    Logger.debug("Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server "
                 "sorted by hostname.")
    return

  if not has_atlas_in_cluster():
    Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster.")
    return

  if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE:
    Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade "
                 "and performing a Downgrade.")
    return

  current_version = get_current_version()
  atlas_hive_hook_dir = format("{stack_root}/{current_version}/atlas/hook/hive/")
  if not os.path.exists(atlas_hive_hook_dir):
    Logger.error(format("ERROR. Atlas is installed in cluster but this Oozie server doesn't "
                        "contain directory {atlas_hive_hook_dir}"))
    return

  atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir, "atlas-hive-plugin-impl")

  num_files = len([name for name in os.listdir(atlas_hive_hook_impl_dir) if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name))])
  Logger.info("Found %d files/directories inside Atlas Hive hook impl directory %s"% (num_files, atlas_hive_hook_impl_dir))

  # This can return over 100 files, so take the first 5 lines after "Available ShareLib"
  # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
  command = format(r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5')
  code, out = checked_call(command, user=params.oozie_user, tries=10, try_sleep=5, logoutput=True)

  hive_sharelib_dir = __parse_sharelib_from_output(out)

  if hive_sharelib_dir is None:
    raise Fail("Could not parse Hive sharelib from output.")

  Logger.info(format("Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}"))

  params.HdfsResource(hive_sharelib_dir,
                      type="directory",
                      action="create_on_execute",
                      source=atlas_hive_hook_impl_dir,
                      user=params.hdfs_user,
                      owner=params.oozie_user,
                      group=params.hdfs_user,
                      mode=0755,
                      recursive_chown=True,
                      recursive_chmod=True,
                      replace_existing_files=True
                      )

  Logger.info("Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.")
  atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename)
  atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename)
  params.HdfsResource(atlas_hook_file_path_dest_in_dfs,
                      type="file",
                      source=atlas_hook_filepath_source,
                      action="create_on_execute",
                      owner=params.oozie_user,
                      group=params.hdfs_user,
                      mode=0755,
                      replace_existing_files=True
                      )
  params.HdfsResource(None, action="execute")

  # Update the sharelib after making any changes
  # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
  Execute(format("source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate"),
          user=params.oozie_user,
          tries=5,
          try_sleep=5,
          logoutput=True,
  )
Example #55
0
 def remove_package(self, name):
   shell.checked_call(REMOVE_CMD % (name))    
Example #56
0
 def install_package(self, name):
   shell.checked_call(INSTALL_CMD % (name))
Example #57
0
 def checked_call(self, cmd, **kwargs):
     return shell.checked_call(cmd, **kwargs)
Example #58
0
    def install_package(self, name, use_repos=[], skip_repos=[]):
        if use_repos or not self._check_existence(name):
            cmd = INSTALL_CMD[self.get_logoutput()]
            copied_sources_files = []
            is_tmp_dir_created = False
            if use_repos:
                is_tmp_dir_created = True
                apt_sources_list_tmp_dir = tempfile.mkdtemp(
                    suffix="-ambari-apt-sources-d")
                Logger.info("Temporal sources directory was created: %s" %
                            apt_sources_list_tmp_dir)
                if 'base' not in use_repos:
                    cmd = cmd + ['-o', 'Dir::Etc::SourceList=%s' % EMPTY_FILE]
                for repo in use_repos:
                    if repo != 'base':
                        new_sources_file = os.path.join(
                            apt_sources_list_tmp_dir, repo + '.list')
                        Logger.info(
                            "Temporal sources file will be copied: %s" %
                            new_sources_file)
                        sudo.copy(
                            os.path.join(APT_SOURCES_LIST_DIR, repo + '.list'),
                            new_sources_file)
                        copied_sources_files.append(new_sources_file)
                cmd = cmd + [
                    '-o',
                    'Dir::Etc::SourceParts=%s' % apt_sources_list_tmp_dir
                ]

            cmd = cmd + [name]
            Logger.info("Installing package %s ('%s')" %
                        (name, string_cmd_from_args_list(cmd)))
            code, out = shell.call(cmd,
                                   sudo=True,
                                   env=INSTALL_CMD_ENV,
                                   logoutput=self.get_logoutput())

            # apt-get update wasn't done too long
            if code:
                Logger.info("Execution of '%s' returned %d. %s" %
                            (cmd, code, out))
                Logger.info("Failed to install package %s. Executing `%s`" %
                            (name, string_cmd_from_args_list(REPO_UPDATE_CMD)))
                code, out = shell.call(REPO_UPDATE_CMD,
                                       sudo=True,
                                       logoutput=self.get_logoutput())

                if code:
                    Logger.info("Execution of '%s' returned %d. %s" %
                                (REPO_UPDATE_CMD, code, out))

                Logger.info("Retrying to install package %s" % (name))
                shell.checked_call(cmd,
                                   sudo=True,
                                   logoutput=self.get_logoutput())

            if is_tmp_dir_created:
                for temporal_sources_file in copied_sources_files:
                    Logger.info("Removing temporal sources file: %s" %
                                temporal_sources_file)
                    os.remove(temporal_sources_file)
                Logger.info("Removing temporal sources directory: %s" %
                            apt_sources_list_tmp_dir)
                os.rmdir(apt_sources_list_tmp_dir)
        else:
            Logger.info("Skipping installation of existing package %s" %
                        (name))
Example #59
0
 def update(self, repo_file_path):
     Logger.info(
         "Flushing package manager cache since repo file content is about to change"
     )
     checked_call(self.update_cmd, sudo=True)
Example #60
0
def chown(path, owner, group):
    if owner:
        shell.checked_call(["chown", owner, path], sudo=True)
    if group:
        shell.checked_call(["chgrp", group, path], sudo=True)