def should_install_falcon_atlas_hook():
  config = Script.get_config()
  stack_version_unformatted = config['hostLevelParams']['stack_version']
  stack_version_formatted = format_stack_version(stack_version_unformatted)
  if check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT_2_3, stack_version_formatted) \
      or check_stack_feature(StackFeature.FALCON_ATLAS_SUPPORT, stack_version_formatted):
    return _has_applicable_local_component(config, ['FALCON_SERVER'])
  return False
def install_windows_msi(url_base, save_dir, save_files, hadoop_user, hadoop_password, stack_version):
  global _working_dir
  _working_dir = save_dir
  save_dir = os.path.abspath(save_dir)
  msi_save_dir = save_dir
  # system wide lock to prevent simultaneous installations(when first task failed on timeout)
  install_lock = SystemWideLock("Global\\hdp_msi_lock")
  try:
    # try to acquire lock
    if not install_lock.lock():
      Logger.info("Some other task currently installing hdp.msi, waiting for 10 min for finish")
      if not install_lock.lock(600000):
        raise Fail("Timeout on acquiring lock")
    if _validate_msi_install():
      Logger.info("hdp.msi already installed")
      return

    stack_version_formatted = format_stack_version(stack_version)
    hdp_22_specific_props = ''
    if stack_version_formatted != "" and compare_versions(stack_version_formatted, '2.2') >= 0:
      hdp_22_specific_props = hdp_22.format(data_dir=data_dir)

    # MSIs cannot be larger than 2GB. HDPWIN 2.3 needed split in order to accommodate this limitation
    msi_file = ''
    for save_file in save_files:
      if save_file.lower().endswith(".msi"):
        msi_file = save_file
      file_url = urlparse.urljoin(url_base, save_file)
      try:
        download_file(file_url, os.path.join(msi_save_dir, save_file))
      except:
        raise Fail("Failed to download {url}".format(url=file_url))

    File(os.path.join(msi_save_dir, "properties.txt"), content=cluster_properties.format(log_dir=log_dir,
                                                                                         data_dir=data_dir,
                                                                                         local_host=local_host,
                                                                                         db_flavor=db_flavor,
                                                                                         hdp_22_specific_props=hdp_22_specific_props))

    # install msi
    msi_path = os_utils.quote_path(os.path.join(save_dir, msi_file))
    log_path = os_utils.quote_path(os.path.join(save_dir, msi_file[:-3] + "log"))
    layout_path = os_utils.quote_path(os.path.join(save_dir, "properties.txt"))
    hadoop_password_arg = os_utils.quote_path(hadoop_password)

    Execute(
      INSTALL_MSI_CMD.format(msi_path=msi_path, log_path=log_path, layout_path=layout_path,
                             hadoop_user=hadoop_user, hadoop_password_arg=hadoop_password_arg))
    reload_windows_env()
    # create additional services manually due to hdp.msi limitaitons
    _ensure_services_created(hadoop_user, hadoop_password)
    _create_symlinks(stack_version)
    # finalizing install
    _write_marker()
    _validate_msi_install()
  finally:
    install_lock.unlock()
Example #3
0
def get_hadoop_dir(target, force_latest_on_upgrade=False):
  """
  Return the hadoop shared directory in the following override order
  1. Use default for 2.1 and lower
  2. If 2.2 and higher, use <stack-root>/current/hadoop-client/{target}
  3. If 2.2 and higher AND for an upgrade, use <stack-root>/<version>/hadoop/{target}.
  However, if the upgrade has not yet invoked <stack-selector-tool>, return the current
  version of the component.
  :target: the target directory
  :force_latest_on_upgrade: if True, then this will return the "current" directory
  without the stack version built into the path, such as <stack-root>/current/hadoop-client
  """
  stack_root = Script.get_stack_root()
  stack_version = Script.get_stack_version()

  if not target in HADOOP_DIR_DEFAULTS:
    raise Fail("Target {0} not defined".format(target))

  hadoop_dir = HADOOP_DIR_DEFAULTS[target]

  formatted_stack_version = format_stack_version(stack_version)
  if formatted_stack_version and  check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version):
    # home uses a different template
    if target == "home":
      hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client")
    else:
      hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client", target)

    # if we are not forcing "current" for HDP 2.2, then attempt to determine
    # if the exact version needs to be returned in the directory
    if not force_latest_on_upgrade:
      stack_info = _get_upgrade_stack()

      if stack_info is not None:
        stack_version = stack_info[1]

        # determine if <stack-selector-tool> has been run and if not, then use the current
        # hdp version until this component is upgraded
        current_stack_version = get_role_component_current_stack_version()
        if current_stack_version is not None and stack_version != current_stack_version:
          stack_version = current_stack_version

        if target == "home":
          # home uses a different template
          hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop")
        else:
          hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target)

  return hadoop_dir
Example #4
0
  def get_stack_version():
    """
    Gets the normalized version of the stack in the form #.#.#.# if it is
    present on the configurations sent.
    :return: a normalized stack version or None
    """
    config = Script.get_config()
    if 'hostLevelParams' not in config or 'stack_version' not in config['hostLevelParams']:
      return None

    stack_version_unformatted = str(config['hostLevelParams']['stack_version'])

    if stack_version_unformatted is None or stack_version_unformatted == '':
      return None

    return format_stack_version(stack_version_unformatted)
Example #5
0
 def should_expose_component_version(self, command_name):
   """
   Analyzes config and given command to determine if stack version should be written
   to structured out. Currently only HDP stack versions >= 2.2 are supported.
   :param command_name: command name
   :return: True or False
   """
   from resource_management.libraries.functions.default import default
   stack_version_unformatted = str(default("/hostLevelParams/stack_version", ""))
   stack_version_formatted = format_stack_version(stack_version_unformatted)
   if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
     if command_name.lower() == "status":
       request_version = default("/commandParams/request_version", None)
       if request_version is not None:
         return True
     else:
       # Populate version only on base commands
       return command_name.lower() == "start" or command_name.lower() == "install" or command_name.lower() == "restart"
   return False
def _create_symlinks(stack_version):
  # folders
  Execute("cmd /c mklink /d %HADOOP_NODE%\\hadoop %HADOOP_HOME%")
  Execute("cmd /c mklink /d %HADOOP_NODE%\\hive %HIVE_HOME%")
  stack_version_formatted = format_stack_version(stack_version)
  if stack_version_formatted != "" and compare_versions(stack_version_formatted, '2.2') >= 0:
    Execute("cmd /c mklink /d %HADOOP_NODE%\\knox %KNOX_HOME%")
  # files pairs (symlink_path, path_template_to_target_file), use * to replace file version
  links_pairs = [
    ("%HADOOP_HOME%\\share\\hadoop\\tools\\lib\\hadoop-streaming.jar",
     "%HADOOP_HOME%\\share\\hadoop\\tools\\lib\\hadoop-streaming-*.jar"),
    ("%HIVE_HOME%\\hcatalog\\share\\webhcat\\svr\\lib\\hive-webhcat.jar",
     "%HIVE_HOME%\\hcatalog\\share\\webhcat\\svr\\lib\\hive-webhcat-*.jar"),
    ("%HIVE_HOME%\\lib\\zookeeper.jar", "%HIVE_HOME%\\lib\\zookeeper-*.jar")
  ]
  for link_pair in links_pairs:
    link, target = link_pair
    target = glob.glob(os.path.expandvars(target))[0].replace("\\\\", "\\")
    Execute('cmd /c mklink "{0}" "{1}"'.format(link, target))
Example #7
0
def get_hadoop_dir_for_stack_version(target, stack_version):
  """
  Return the hadoop shared directory for the provided stack version. This is necessary
  when folder paths of downgrade-source stack-version are needed after <stack-selector-tool>.
  :target: the target directory
  :stack_version: stack version to get hadoop dir for
  """

  stack_root = Script.get_stack_root()
  if not target in HADOOP_DIR_DEFAULTS:
    raise Fail("Target {0} not defined".format(target))

  hadoop_dir = HADOOP_DIR_DEFAULTS[target]

  formatted_stack_version = format_stack_version(stack_version)
  if formatted_stack_version and  check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version):
    # home uses a different template
    if target == "home":
      hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop")
    else:
      hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, stack_version, "hadoop", target)

  return hadoop_dir
# for use with <stack-root>/current/<component>
SERVER_ROLE_DIRECTORY_MAP = {
  'SPARK2_JOBHISTORYSERVER' : 'spark2-historyserver',
  'SPARK2_CLIENT' : 'spark2-client',
  'SPARK2_THRIFTSERVER' : 'spark2-thriftserver'
}

component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP, "SPARK2_CLIENT")

config = Script.get_config()
tmp_dir = Script.get_tmp_dir()

stack_name = status_params.stack_name
stack_root = Script.get_stack_root()
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)
host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False)

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
version = default("/commandParams/version", None)

spark_conf = '/etc/spark2/conf'
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")

if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
  hadoop_home = stack_select.get_hadoop_dir("home")
  spark_conf = format("{stack_root}/current/{component_directory}/conf")
  spark_log_dir = config['configurations']['spark2-env']['spark_log_dir']
  spark_pid_dir = status_params.spark_pid_dir
  spark_home = format("{stack_root}/current/{component_directory}")
Example #9
0
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None):
  """
   If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server,
  then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g.,
  /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive

  :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING
  :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE.
  """
  import params

  # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction.
  effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version)
  if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version):
    return
    
  # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server.
  if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]):
    Logger.debug("Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server "
                 "sorted by hostname.")
    return

  if not has_atlas_in_cluster():
    Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster.")
    return

  if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE:
    Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade "
                 "and performing a Downgrade.")
    return

  current_version = get_current_version()
  atlas_hive_hook_dir = format("{stack_root}/{current_version}/atlas/hook/hive/")
  if not os.path.exists(atlas_hive_hook_dir):
    Logger.error(format("ERROR. Atlas is installed in cluster but this Oozie server doesn't "
                        "contain directory {atlas_hive_hook_dir}"))
    return

  atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir, "atlas-hive-plugin-impl")

  num_files = len([name for name in os.listdir(atlas_hive_hook_impl_dir) if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name))])
  Logger.info("Found %d files/directories inside Atlas Hive hook impl directory %s"% (num_files, atlas_hive_hook_impl_dir))

  # This can return over 100 files, so take the first 5 lines after "Available ShareLib"
  # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
  command = format(r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5')
  Execute(command,
          user=params.oozie_user,
          tries=10,
          try_sleep=5,
          logoutput=True,
  )

  hive_sharelib_dir = __parse_sharelib_from_output(out)

  if hive_sharelib_dir is None:
    raise Fail("Could not parse Hive sharelib from output.")

  Logger.info(format("Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}"))

  params.HdfsResource(hive_sharelib_dir,
                      type="directory",
                      action="create_on_execute",
                      source=atlas_hive_hook_impl_dir,
                      user=params.hdfs_user,
                      owner=params.oozie_user,
                      group=params.hdfs_user,
                      mode=0755,
                      recursive_chown=True,
                      recursive_chmod=True,
                      replace_existing_files=True
                      )

  Logger.info("Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.")
  atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename)
  atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename)
  params.HdfsResource(atlas_hook_file_path_dest_in_dfs,
                      type="file",
                      source=atlas_hook_filepath_source,
                      action="create_on_execute",
                      owner=params.oozie_user,
                      group=params.hdfs_user,
                      mode=0755,
                      replace_existing_files=True
                      )
  params.HdfsResource(None, action="execute")

  # Update the sharelib after making any changes
  # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
  Execute(format("source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate"),
          user=params.oozie_user,
          tries=5,
          try_sleep=5,
          logoutput=True,
  )
Example #10
0
limitations under the License.

"""

from ambari_commons.constants import AMBARI_SUDO_BINARY
from resource_management.libraries.functions.version import format_stack_version, compare_versions
from resource_management.core.system import System
from resource_management.libraries.script.script import Script
from resource_management.libraries.functions import default, format

config = Script.get_config()
tmp_dir = Script.get_tmp_dir()
sudo = AMBARI_SUDO_BINARY

stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
iop_stack_version = format_stack_version(stack_version_unformatted)

#users and groups
hbase_user = config['configurations']['hbase-env']['hbase_user']
smoke_user = config['configurations']['cluster-env']['smokeuser']
gmetad_user = config['configurations']['ganglia-env']["gmetad_user"]
gmond_user = config['configurations']['ganglia-env']["gmond_user"]
tez_user = None  #config['configurations']['tez-env']["tez_user"]

user_group = config['configurations']['cluster-env']['user_group']
proxyuser_group = default("/configurations/hadoop-env/proxyuser_group",
                          "users")

hdfs_log_dir_prefix = config['configurations']['hadoop-env'][
    'hdfs_log_dir_prefix']
Example #11
0
def spark_service(name, upgrade_type=None, action=None):
    import params

    if action == 'start':

        effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
        if effective_version:
            effective_version = format_stack_version(effective_version)

        if name == 'jobhistoryserver' and effective_version and check_stack_feature(
                StackFeature.SPARK_16PLUS, effective_version):
            # copy spark-hdp-assembly.jar to hdfs
            copy_to_hdfs("spark",
                         params.user_group,
                         params.hdfs_user,
                         host_sys_prepped=params.host_sys_prepped)
            # create spark history directory
            params.HdfsResource(params.spark_history_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.spark_user,
                                group=params.user_group,
                                mode=0777,
                                recursive_chmod=True)
            params.HdfsResource(None, action="execute")

        if params.security_enabled:
            spark_kinit_cmd = format(
                "{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; "
            )
            Execute(spark_kinit_cmd, user=params.spark_user)

        # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
        # need to copy the tarball, otherwise, copy it.
        if params.stack_version_formatted and check_stack_feature(
                StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
            resource_created = copy_to_hdfs(
                "tez",
                params.user_group,
                params.hdfs_user,
                host_sys_prepped=params.host_sys_prepped)
            if resource_created:
                params.HdfsResource(None, action="execute")

        if name == 'jobhistoryserver':
            historyserver_no_op_test = format(
                'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1'
            )
            try:
                Execute(format('{spark_history_server_start}'),
                        user=params.spark_user,
                        environment={'JAVA_HOME': params.java_home},
                        not_if=historyserver_no_op_test)
            except:
                show_logs(params.spark_log_dir, user=params.spark_user)
                raise

        elif name == 'sparkthriftserver':
            if params.security_enabled:
                hive_principal = params.hive_kerberos_principal.replace(
                    '_HOST',
                    socket.getfqdn().lower())
                hive_kinit_cmd = format(
                    "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; "
                )
                Execute(hive_kinit_cmd, user=params.hive_user)

            thriftserver_no_op_test = format(
                'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1'
            )
            try:
                Execute(format(
                    '{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'
                ),
                        user=params.hive_user,
                        environment={'JAVA_HOME': params.java_home},
                        not_if=thriftserver_no_op_test)
            except:
                show_logs(params.spark_log_dir, user=params.hive_user)
                raise
    elif action == 'stop':
        if name == 'jobhistoryserver':
            try:
                Execute(format('{spark_history_server_stop}'),
                        user=params.spark_user,
                        environment={'JAVA_HOME': params.java_home})
            except:
                show_logs(params.spark_log_dir, user=params.spark_user)
                raise
            File(params.spark_history_server_pid_file, action="delete")

        elif name == 'sparkthriftserver':
            try:
                Execute(format('{spark_thrift_server_stop}'),
                        user=params.hive_user,
                        environment={'JAVA_HOME': params.java_home})
            except:
                show_logs(params.spark_log_dir, user=params.hive_user)
                raise
            File(params.spark_thrift_server_pid_file, action="delete")
Example #12
0
    'HCAT': 'hive-client',
    'HIVE_SERVER_INTERACTIVE': 'hive-server2-hive2'
}

# Either HIVE_METASTORE, HIVE_SERVER, WEBHCAT_SERVER, HIVE_CLIENT, HCAT, HIVE_SERVER_INTERACTIVE
role = default("/role", None)
component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP,
                                                     "HIVE_CLIENT")
component_directory_interactive = Script.get_component_from_role(
    SERVER_ROLE_DIRECTORY_MAP, "HIVE_SERVER_INTERACTIVE")

config = Script.get_config()

stack_root = Script.get_stack_root()
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted_major = format_stack_version(stack_version_unformatted)

if OSCheck.is_windows_family():
    hive_metastore_win_service_name = "metastore"
    hive_client_win_service_name = "hwi"
    hive_server_win_service_name = "hiveserver2"
    webhcat_server_win_service_name = "templeton"
else:
    hive_pid_dir = config['configurations']['hive-env']['hive_pid_dir']
    hive_pid = 'hive-server.pid'
    hive_interactive_pid = 'hive-interactive.pid'
    hive_metastore_pid = 'hive.pid'

    hcat_pid_dir = config['configurations']['hive-env'][
        'hcat_pid_dir']  #hcat_pid_dir
    webhcat_pid_file = format('{hcat_pid_dir}/webhcat.pid')
Example #13
0
def setup_spark(env, type, upgrade_type = None, action = None):
  import params

  # ensure that matching LZO libraries are installed for Spark
  lzo_utils.install_lzo_if_needed()

  Directory([params.spark_pid_dir, params.spark_log_dir],
            owner=params.spark_user,
            group=params.user_group,
            mode=0775,
            create_parents = True
  )
  if type == 'server' and action == 'config':
    params.HdfsResource(params.spark_hdfs_user_dir,
                       type="directory",
                       action="create_on_execute",
                       owner=params.spark_user,
                       mode=0775
    )

    if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(params.default_fs).scheme:
    # Create Spark Warehouse Dir
      params.HdfsResource(params.spark_warehouse_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          mode=0777
      )

    params.HdfsResource(None, action="execute")



    generate_logfeeder_input_config('spark2', Template("input.config-spark2.json.j2", extra_imports=[default]))



  spark2_defaults = dict(params.config['configurations']['spark2-defaults'])

  if params.security_enabled:
    spark2_defaults.pop("history.server.spnego.kerberos.principal")
    spark2_defaults.pop("history.server.spnego.keytab.file")
    spark2_defaults['spark.history.kerberos.principal'] = spark2_defaults['spark.history.kerberos.principal'].replace('_HOST', socket.getfqdn().lower())

  PropertiesFile(format("{spark_conf}/spark-defaults.conf"),
    properties = spark2_defaults,
    key_value_delimiter = " ",
    owner=params.spark_user,
    group=params.spark_group,
    mode=0644
  )

  # create spark-env.sh in etc/conf dir
  File(os.path.join(params.spark_conf, 'spark-env.sh'),
       owner=params.spark_user,
       group=params.spark_group,
       content=InlineTemplate(params.spark_env_sh),
       mode=0644,
  )

  #create log4j.properties in etc/conf dir
  File(os.path.join(params.spark_conf, 'log4j.properties'),
       owner=params.spark_user,
       group=params.spark_group,
       content=params.spark_log4j_properties,
       mode=0644,
  )

  #create metrics.properties in etc/conf dir
  File(os.path.join(params.spark_conf, 'metrics.properties'),
       owner=params.spark_user,
       group=params.spark_group,
       content=InlineTemplate(params.spark_metrics_properties),
       mode=0644
  )

  if params.is_hive_installed:
    XmlConfig("hive-site.xml",
          conf_dir=params.spark_conf,
          configurations=params.spark_hive_properties,
          owner=params.spark_user,
          group=params.spark_group,
          mode=0644)

  if params.has_spark_thriftserver:
    spark2_thrift_sparkconf = dict(params.config['configurations']['spark2-thrift-sparkconf'])

    if params.security_enabled and 'spark.yarn.principal' in spark2_thrift_sparkconf:
      spark2_thrift_sparkconf['spark.yarn.principal'] = spark2_thrift_sparkconf['spark.yarn.principal'].replace('_HOST', socket.getfqdn().lower())

    PropertiesFile(params.spark_thrift_server_conf_file,
      properties = spark2_thrift_sparkconf,
      owner = params.hive_user,
      group = params.user_group,
      key_value_delimiter = " ",
      mode=0644
    )

  effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
  if effective_version:
    effective_version = format_stack_version(effective_version)

  if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
    # create spark-thrift-fairscheduler.xml
    File(os.path.join(params.spark_conf,"spark-thrift-fairscheduler.xml"),
      owner=params.spark_user,
      group=params.spark_group,
      mode=0755,
      content=InlineTemplate(params.spark_thrift_fairscheduler_content)
    )
Example #14
0
    def upgrade_schema(self, env):
        """
    Executes the schema upgrade binary.  This is its own function because it could
    be called as a standalone task from the upgrade pack, but is safe to run it for each
    metastore instance. The schema upgrade on an already upgraded metastore is a NOOP.

    The metastore schema upgrade requires a database driver library for most
    databases. During an upgrade, it's possible that the library is not present,
    so this will also attempt to copy/download the appropriate driver.

    This function will also ensure that configurations are written out to disk before running
    since the new configs will most likely not yet exist on an upgrade.

    Should not be invoked for a DOWNGRADE; Metastore only supports schema upgrades.
    """
        Logger.info("Upgrading Hive Metastore Schema")
        import params
        env.set_params(params)

        # ensure that configurations are written out before trying to upgrade the schema
        # since the schematool needs configs and doesn't know how to use the hive conf override
        self.configure(env)

        if params.security_enabled:
            kinit_command = format(
                "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; "
            )
            Execute(kinit_command, user=params.smokeuser)

        # ensure that the JDBC drive is present for the schema tool; if it's not
        # present, then download it first
        if params.hive_jdbc_driver in params.hive_jdbc_drivers_list:
            target_directory = format("{stack_root}/{version}/hive/lib")

            # download it if it does not exist
            if not os.path.exists(params.source_jdbc_file):
                jdbc_connector(params.target_hive)

            target_directory_and_filename = os.path.join(
                target_directory, os.path.basename(params.source_jdbc_file))

            if params.sqla_db_used:
                target_native_libs_directory = format(
                    "{target_directory}/native/lib64")

                Execute(
                    format(
                        "yes | {sudo} cp {jars_in_hive_lib} {target_directory}"
                    ))

                Directory(target_native_libs_directory, create_parents=True)

                Execute(
                    format(
                        "yes | {sudo} cp {libs_in_hive_lib} {target_native_libs_directory}"
                    ))

                Execute(
                    format(
                        "{sudo} chown -R {hive_user}:{user_group} {hive_lib}/*"
                    ))
            else:
                # copy the JDBC driver from the older metastore location to the new location only
                # if it does not already exist
                if not os.path.exists(target_directory_and_filename):
                    Execute(('cp', params.source_jdbc_file, target_directory),
                            path=["/bin", "/usr/bin/"],
                            sudo=True)

            File(target_directory_and_filename, mode=0644)

        # build the schema tool command
        binary = format("{hive_schematool_ver_bin}/schematool")

        # the conf.server directory changed locations between stack versions
        # since the configurations have not been written out yet during an upgrade
        # we need to choose the original legacy location
        schematool_hive_server_conf_dir = params.hive_server_conf_dir
        if params.current_version is not None:
            current_version = format_stack_version(params.current_version)
            if not (check_stack_feature(StackFeature.CONFIG_VERSIONING,
                                        current_version)):
                schematool_hive_server_conf_dir = LEGACY_HIVE_SERVER_CONF

        env_dict = {'HIVE_CONF_DIR': schematool_hive_server_conf_dir}

        command = format(
            "{binary} -dbType {hive_metastore_db_type} -upgradeSchema")
        Execute(command,
                user=params.hive_user,
                tries=1,
                environment=env_dict,
                logoutput=True)
Example #15
0
from resource_management.libraries.functions.get_port_from_url import get_port_from_url
from resource_management.libraries import functions

# server configurations
config = Script.get_config()
tmp_dir = Script.get_tmp_dir()
sudo = AMBARI_SUDO_BINARY

stack_name = default("/hostLevelParams/stack_name", None)

# node hostname
hostname = config["hostname"]

# This is expected to be of the form #.#.#.#
stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
stack_version = format_stack_version(stack_version_unformatted)
stack_is_21 = False

# this is not available on INSTALL action because hdp-select is not available
iop_stack_version = functions.get_stack_version('hive-server2')

# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade.
# It cannot be used during the initial Cluser Install because the version is not yet known.
version = default("/commandParams/version", None)

# current host stack version
current_version = default("/hostLevelParams/current_version", None)

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)
Example #16
0
    'RANGER_ADMIN': 'ranger-admin',
    'RANGER_USERSYNC': 'ranger-usersync'
}

component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP,
                                                     "RANGER_ADMIN")

config = Script.get_config()
tmp_dir = Script.get_tmp_dir()

stack_name = default("/hostLevelParams/stack_name", None)
version = default("/commandParams/version", None)
host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False)

stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
stack_version_formatted = format_stack_version(stack_version_unformatted)

upgrade_marker_file = format("{tmp_dir}/rangeradmin_ru.inprogress")

xml_configurations_supported = config['configurations']['ranger-env'][
    'xml_configurations_supported']

create_db_dbuser = config['configurations']['ranger-env']['create_db_dbuser']

stack_is_hdp22_or_further = Script.is_stack_greater_or_equal("2.2")
stack_is_hdp23_or_further = Script.is_stack_greater_or_equal("2.3")

downgrade_from_version = default("/commandParams/downgrade_from_version", None)
upgrade_direction = default("/commandParams/upgrade_direction", None)

ranger_conf = '/etc/ranger/admin/conf'
Example #17
0
    def actionexecute(self, env):
        num_errors = 0

        # Parse parameters
        config = Script.get_config()

        repo_rhel_suse = config['configurations']['cluster-env'][
            'repo_suse_rhel_template']
        repo_ubuntu = config['configurations']['cluster-env'][
            'repo_ubuntu_template']
        template = repo_rhel_suse if OSCheck.is_redhat_family(
        ) or OSCheck.is_suse_family() else repo_ubuntu

        # Handle a SIGTERM and SIGINT gracefully
        signal.signal(signal.SIGTERM, self.abort_handler)
        signal.signal(signal.SIGINT, self.abort_handler)

        # Select dict that contains parameters
        try:
            self.repository_version = config['roleParams'][
                'repository_version']
            base_urls = json.loads(config['roleParams']['base_urls'])
            package_list = json.loads(config['roleParams']['package_list'])
            stack_id = config['roleParams']['stack_id']
        except KeyError:
            # Last try
            self.repository_version = config['commandParams'][
                'repository_version']
            base_urls = json.loads(config['commandParams']['base_urls'])
            package_list = json.loads(config['commandParams']['package_list'])
            stack_id = config['commandParams']['stack_id']

        # current stack information
        self.current_stack_version_formatted = None
        if 'stack_version' in config['hostLevelParams']:
            current_stack_version_unformatted = str(
                config['hostLevelParams']['stack_version'])
            self.current_stack_version_formatted = format_stack_version(
                current_stack_version_unformatted)

        self.stack_name = Script.get_stack_name()
        if self.stack_name is None:
            raise Fail("Cannot determine the stack name")
        self.stack_root_folder = Script.get_stack_root()
        if self.stack_root_folder is None:
            raise Fail("Cannot determine the stack's root directory")
        if self.repository_version is None:
            raise Fail("Cannot determine the repository version to install")

        self.repository_version = self.repository_version.strip()

        # Install/update repositories
        installed_repositories = []
        self.current_repositories = []
        self.current_repo_files = set()

        # Enable base system repositories
        # We don't need that for RHEL family, because we leave all repos enabled
        # except disabled HDP* ones
        if OSCheck.is_suse_family():
            self.current_repositories.append('base')
        elif OSCheck.is_ubuntu_family():
            self.current_repo_files.add('base')

        Logger.info("Will install packages for repository version {0}".format(
            self.repository_version))

        if 0 == len(base_urls):
            Logger.info(
                "Repository list is empty. Ambari may not be managing the repositories for {0}."
                .format(self.repository_version))

        try:
            append_to_file = False
            for url_info in base_urls:
                repo_name, repo_file = self.install_repository(
                    url_info, append_to_file, template)
                self.current_repositories.append(repo_name)
                self.current_repo_files.add(repo_file)
                append_to_file = True

            installed_repositories = list_ambari_managed_repos(self.stack_name)
        except Exception, err:
            Logger.logger.exception(
                "Cannot distribute repositories. Error: {0}".format(str(err)))
            num_errors += 1
Example #18
0
def zookeeper(type=None, upgrade_type=None):
    import params

    if type == 'server':
        # This path may be missing after Ambari upgrade. We need to create it. We need to do this before any configs will
        # be applied.
        if upgrade_type is None and not os.path.exists(os.path.join(params.stack_root,"/current/zookeeper-server")) and params.current_version\
          and check_stack_feature(StackFeature.ROLLING_UPGRADE, format_stack_version(params.version)):
            conf_select.select(params.stack_name, "zookeeper",
                               params.current_version)
            stack_select.select("zookeeper-server", params.version)

    Directory(params.config_dir,
              owner=params.zk_user,
              create_parents=True,
              group=params.user_group)

    File(os.path.join(params.config_dir, "zookeeper-env.sh"),
         content=InlineTemplate(params.zk_env_sh_template),
         owner=params.zk_user,
         group=params.user_group)

    configFile("zoo.cfg", template_name="zoo.cfg.j2")
    configFile("configuration.xsl", template_name="configuration.xsl.j2")

    Directory(
        params.zk_pid_dir,
        owner=params.zk_user,
        create_parents=True,
        group=params.user_group,
        mode=0755,
    )

    Directory(
        params.zk_log_dir,
        owner=params.zk_user,
        create_parents=True,
        group=params.user_group,
        mode=0755,
    )

    Directory(
        params.zk_data_dir,
        owner=params.zk_user,
        create_parents=True,
        cd_access="a",
        group=params.user_group,
        mode=0755,
    )

    if type == 'server':
        myid = str(sorted(params.zookeeper_hosts).index(params.hostname) + 1)

        File(os.path.join(params.zk_data_dir, "myid"), mode=0644, content=myid)

    if (params.log4j_props != None):
        File(os.path.join(params.config_dir, "log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.zk_user,
             content=params.log4j_props)
    elif (os.path.exists(os.path.join(params.config_dir, "log4j.properties"))):
        File(os.path.join(params.config_dir, "log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.zk_user)

    if params.security_enabled:
        if type == "server":
            configFile("zookeeper_jaas.conf",
                       template_name="zookeeper_jaas.conf.j2")
            configFile("zookeeper_client_jaas.conf",
                       template_name="zookeeper_client_jaas.conf.j2")
        else:
            configFile("zookeeper_client_jaas.conf",
                       template_name="zookeeper_client_jaas.conf.j2")

    File(os.path.join(params.config_dir, "zoo_sample.cfg"),
         owner=params.zk_user,
         group=params.user_group)
Example #19
0
def pre_upgrade_deregister():
    """
  Runs the "hive --service hiveserver2 --deregister <version>" command to
  de-provision the server in preparation for an upgrade. This will contact
  ZooKeeper to remove the server so that clients that attempt to connect
  will be directed to other servers automatically. Once all
  clients have drained, the server will shutdown automatically; this process
  could take a very long time.
  This function will obtain the Kerberos ticket if security is enabled.
  :return:
  """
    import params

    Logger.info(
        'HiveServer2 executing "deregister" command in preparation for upgrade...'
    )

    if params.security_enabled:
        kinit_command = format(
            "{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; "
        )
        Execute(kinit_command, user=params.smokeuser)

    # calculate the current hive server version
    current_hiveserver_version = _get_current_hiveserver_version()
    if current_hiveserver_version is None:
        raise Fail(
            'Unable to determine the current HiveServer2 version to deregister.'
        )

    # fallback when upgrading because /usr/iop/current/hive-server2/conf/conf.server may not exist
    hive_server_conf_dir = params.hive_server_conf_dir
    if not os.path.exists(hive_server_conf_dir):
        hive_server_conf_dir = "/etc/hive/conf.server"

    # deregister
    hive_execute_path = params.execute_path
    # If upgrading, the upgrade-target hive binary should be used to call the --deregister command.
    # If downgrading, the downgrade-source hive binary should be used to call the --deregister command.
    if "upgrade" == params.upgrade_direction:
        # hive_bin
        upgrade_target_version = format_stack_version(params.version)
        if upgrade_target_version and compare_versions(upgrade_target_version,
                                                       "4.1.0.0") >= 0:
            upgrade_target_hive_bin = format('/usr/iop/{version}/hive/bin')
            if (os.pathsep + params.hive_bin) in hive_execute_path:
                hive_execute_path = hive_execute_path.replace(
                    os.pathsep + params.hive_bin,
                    os.pathsep + upgrade_target_hive_bin)
        # hadoop_bin_dir
        upgrade_target_hadoop_bin = stack_select.get_hadoop_dir(
            "bin", upgrade_stack_only=True)
        upgrade_source_hadoop_bin = params.hadoop_bin_dir
        if upgrade_target_hadoop_bin and len(
                upgrade_target_hadoop_bin) > 0 and (
                    os.pathsep +
                    upgrade_source_hadoop_bin) in hive_execute_path:
            hive_execute_path = hive_execute_path.replace(
                os.pathsep + upgrade_source_hadoop_bin,
                os.pathsep + upgrade_target_hadoop_bin)

    command = format(
        'hive --config {hive_server_conf_dir} --service hiveserver2 --deregister '
        + current_hiveserver_version)
    Execute(command, user=params.hive_user, path=hive_execute_path, tries=1)
Example #20
0
def get_stack_feature_version(config):
    """
  Uses the specified ConfigDictionary to determine which version to use for stack
  feature checks.

  Normally, the commandParams/version is the correct value to use as it represent the 4-digit
  exact stack version/build being upgrade to or downgraded to. However, there are cases where the
  commands being sent are to stop running services which are on a different stack version from the
  version being upgraded/downgraded to. As a result, the configurations sent for these specific
  stop commands do not match commandParams/version.
  :param config:  a ConfigDictionary instance to extra the hostLevelParams
                  and commandParams from.
  :return: the version to use when checking stack features.
  """
    from resource_management.libraries.functions.default import default

    if "hostLevelParams" not in config or "commandParams" not in config:
        raise Fail(
            "Unable to determine the correct version since hostLevelParams and commandParams were not present in the configuration dictionary"
        )

    # should always be there
    stack_version = config['hostLevelParams']['stack_version']

    # something like 2.4.0.0-1234; represents the version for the command
    # (or None if this is a cluster install and it hasn't been calculated yet)
    version = default("/commandParams/version", None)

    # something like 2.4.0.0-1234
    # (or None if this is a cluster install and it hasn't been calculated yet)
    current_cluster_version = default("/hostLevelParams/current_version", None)

    # UPGRADE or DOWNGRADE (or None)
    upgrade_direction = default("/commandParams/upgrade_direction", None)

    # start out with the value that's right 99% of the time
    version_for_stack_feature_checks = version if version is not None else stack_version

    # if this is not an upgrade, then we take the simple path
    if upgrade_direction is None:
        Logger.info(
            "Stack Feature Version Info: stack_version={0}, version={1}, current_cluster_version={2} -> {3}"
            .format(stack_version, version, current_cluster_version,
                    version_for_stack_feature_checks))

        return version_for_stack_feature_checks

    # STOP commands are the trouble maker as they are intended to stop a service not on the
    # version of the stack being upgrade/downgraded to
    is_stop_command = _is_stop_command(config)
    if not is_stop_command:
        Logger.info(
            "Stack Feature Version Info: stack_version={0}, version={1}, current_cluster_version={2}, upgrade_direction={3} -> {4}"
            .format(stack_version, version, current_cluster_version,
                    upgrade_direction, version_for_stack_feature_checks))

        return version_for_stack_feature_checks

    original_stack = default("/commandParams/original_stack", None)
    target_stack = default("/commandParams/target_stack", None)

    # something like 2.5.0.0-5678 (or None)
    downgrade_from_version = default("/commandParams/downgrade_from_version",
                                     None)

    # guaranteed to have a STOP command now during an UPGRADE/DOWNGRADE, check direction
    if upgrade_direction.lower() == Direction.DOWNGRADE.lower():
        if downgrade_from_version is None:
            Logger.warning(
                "Unable to determine the version being downgraded when stopping services, using {0}"
                .format(version_for_stack_feature_checks))
        else:
            version_for_stack_feature_checks = downgrade_from_version
    else:
        # UPGRADE
        if current_cluster_version is not None:
            version_for_stack_feature_checks = current_cluster_version
        elif original_stack is not None:
            version_for_stack_feature_checks = format_stack_version(
                original_stack)
        else:
            version_for_stack_feature_checks = version if version is not None else stack_version

    Logger.info(
        "Stack Feature Version Info: stack_version={0}, version={1}, current_cluster_version={2}, upgrade_direction={3}, original_stack={4}, target_stack={5}, downgrade_from_version={6}, stop_command={7} -> {8}"
        .format(stack_version, version, current_cluster_version,
                upgrade_direction, original_stack, target_stack,
                downgrade_from_version, is_stop_command,
                version_for_stack_feature_checks))

    return version_for_stack_feature_checks
def setup_spark(env, type, upgrade_type=None, action=None):
    import params

    Directory([params.spark_pid_dir, params.spark_log_dir],
              owner=params.spark_user,
              group=params.user_group,
              mode=0775,
              create_parents=True)
    if type == 'server' and action == 'config':
        params.HdfsResource(params.spark_hdfs_user_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.spark_user,
                            mode=0775)
        params.HdfsResource(None, action="execute")

    PropertiesFile(
        format("{spark_conf}/spark-defaults.conf"),
        properties=params.config['configurations']['spark2-defaults'],
        key_value_delimiter=" ",
        owner=params.spark_user,
        group=params.spark_group,
    )

    # create spark-env.sh in etc/conf dir
    File(
        os.path.join(params.spark_conf, 'spark-env.sh'),
        owner=params.spark_user,
        group=params.spark_group,
        content=InlineTemplate(params.spark_env_sh),
        mode=0644,
    )

    #create log4j.properties in etc/conf dir
    File(
        os.path.join(params.spark_conf, 'log4j.properties'),
        owner=params.spark_user,
        group=params.spark_group,
        content=params.spark_log4j_properties,
        mode=0644,
    )

    #create metrics.properties in etc/conf dir
    File(os.path.join(params.spark_conf, 'metrics.properties'),
         owner=params.spark_user,
         group=params.spark_group,
         content=InlineTemplate(params.spark_metrics_properties))

    Directory(
        params.spark_logs_dir,
        owner=params.spark_user,
        group=params.spark_group,
        mode=0755,
    )

    if params.is_hive_installed:
        XmlConfig("hive-site.xml",
                  conf_dir=params.spark_conf,
                  configurations=params.spark_hive_properties,
                  owner=params.spark_user,
                  group=params.spark_group,
                  mode=0644)

    if params.has_spark_thriftserver:
        PropertiesFile(
            params.spark_thrift_server_conf_file,
            properties=params.config['configurations']
            ['spark2-thrift-sparkconf'],
            owner=params.hive_user,
            group=params.user_group,
            key_value_delimiter=" ",
        )

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
        effective_version = format_stack_version(effective_version)

    if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(
            StackFeature.SPARK_16PLUS, effective_version):
        # create spark-thrift-fairscheduler.xml
        File(os.path.join(params.spark_conf, "spark-thrift-fairscheduler.xml"),
             owner=params.spark_user,
             group=params.spark_group,
             mode=0755,
             content=InlineTemplate(params.spark_thrift_fairscheduler_content))
Example #22
0
def spark_service(name, upgrade_type=None, action=None):
  import params

  if action == 'start':

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
      effective_version = format_stack_version(effective_version)

    if name == 'jobhistoryserver' and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
      # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs
      if not params.sysprep_skip_copy_tarballs_hdfs:
          source_dir=params.spark_home+"/jars"
          tmp_archive_file=get_tarball_paths("spark2")[1]
          make_tarfile(tmp_archive_file, source_dir)
          copy_to_hdfs("spark2", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True)

      # create & copy spark2-hdp-hive-archive.tar.gz to hdfs
      if not params.sysprep_skip_copy_tarballs_hdfs:
        source_dir=params.spark_home+"/standalone-metastore"
        tmp_archive_file=get_tarball_paths("spark2hive")[1]
        make_tarfile(tmp_archive_file, source_dir)
        copy_to_hdfs("spark2hive", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True)

      # create spark history directory
      params.HdfsResource(params.spark_history_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          group=params.user_group,
                          mode=0777,
                          recursive_chmod=True
                          )
      params.HdfsResource(None, action="execute")

    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs)
      if resource_created:
        params.HdfsResource(None, action="execute")

    if name == 'jobhistoryserver':

      create_catalog_cmd = format("{hive_schematool_bin}/schematool -dbType {hive_metastore_db_type} "
                                    "-createCatalog {default_metastore_catalog} "
                                    "-catalogDescription 'Default catalog, for Spark' -ifNotExists "
                                    "-catalogLocation {default_fs}{spark_warehouse_dir}")

      Execute(create_catalog_cmd,
                user = params.hive_user)

      historyserver_no_op_test = as_sudo(["test", "-f", params.spark_history_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_history_server_pid_file])
      try:
        Execute(params.spark_history_server_start,
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=historyserver_no_op_test)
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

    elif name == 'sparkthriftserver':
      if params.security_enabled:
        hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_kerberos_principal}; ")
        Execute(hive_kinit_cmd, user=params.spark_user)

      thriftserver_no_op_test= as_sudo(["test", "-f", params.spark_thrift_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_thrift_server_pid_file])
      try:
        Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=thriftserver_no_op_test
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
  elif action == 'stop':
    if name == 'jobhistoryserver':
      try:
        Execute(format('{spark_history_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_history_server_pid_file,
        action="delete"
      )

    elif name == 'sparkthriftserver':
      try:
        Execute(format('{spark_thrift_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_thrift_server_pid_file,
        action="delete"
      )
Example #23
0
    'SPARK_JOBHISTORYSERVER': 'spark-historyserver',
    'SPARK_CLIENT': 'spark-client',
    'SPARK_THRIFTSERVER': 'spark-thriftserver'
}
upgrade_direction = default("/commandParams/upgrade_direction", None)

component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP,
                                                     "SPARK_CLIENT")

config = Script.get_config()
tmp_dir = Script.get_tmp_dir()

# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
version = default("/commandParams/version", None)
stack_name = default("/hostLevelParams/stack_name", None)
iop_full_version = format_stack_version(version)

hadoop_home = "/usr/iop/current/hadoop-client"
spark_conf = format("/usr/iop/current/{component_directory}/conf")
spark_log_dir = config['configurations']['spark-env']['spark_log_dir']
spark_pid_dir = status_params.spark_pid_dir
spark_role_root = "spark-client"

command_role = default("/role", "")

if command_role == "SPARK_CLIENT":
    spark_role_root = "spark-client"
elif command_role == "SPARK_JOBHISTORYSERVER":
    spark_role_root = "spark-historyserver"
elif command_role == "SPARK_THRIFTSERVER":
    spark_role_root = "spark-thriftserver"
Example #24
0
def kafka(upgrade_type=None):
    import params
    ensure_base_directories()

    kafka_server_config = mutable_config_dict(
        params.config['configurations']['kafka-broker'])
    # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2.
    # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to.

    effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(
        params.version)
    Logger.info(format("Effective stack version: {effective_version}"))

    kafka_server_config['host.name'] = params.hostname

    if params.has_metric_collector:
        kafka_server_config[
            'kafka.timeline.metrics.hosts'] = params.ams_collector_hosts
        kafka_server_config[
            'kafka.timeline.metrics.port'] = params.metric_collector_port
        kafka_server_config[
            'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol
        kafka_server_config[
            'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path
        kafka_server_config[
            'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type
        kafka_server_config[
            'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password

    kafka_data_dir = kafka_server_config['log.dirs']
    kafka_data_dirs = filter(None, kafka_data_dir.split(","))
    Directory(
        kafka_data_dirs,
        mode=0755,
        cd_access='a',
        owner=params.kafka_user,
        group=params.user_group,
        create_parents=True,
        recursive_ownership=True,
    )

    PropertiesFile(
        "server.properties",
        dir=params.conf_dir,
        properties=kafka_server_config,
        owner=params.kafka_user,
        group=params.user_group,
    )

    File(format("{conf_dir}/kafka-env.sh"),
         mode=0755,
         owner=params.kafka_user,
         content=InlineTemplate(params.kafka_env_sh_template))

    File(format("{kafka_bin_dir}/kafka-run-class.sh"),
         mode=0755,
         owner=params.kafka_user,
         content=InlineTemplate(params.kafka_run_class_content_template))

    if (params.log4j_props != None):
        File(format("{conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.kafka_user,
             content=InlineTemplate(params.log4j_props))

    if params.security_enabled and params.kafka_kerberos_enabled:
        if params.kafka_jaas_conf_template:
            File(format("{conf_dir}/kafka_jaas.conf"),
                 owner=params.kafka_user,
                 content=InlineTemplate(params.kafka_jaas_conf_template))
        else:
            TemplateConfig(format("{conf_dir}/kafka_jaas.conf"),
                           owner=params.kafka_user)

        if params.kafka_client_jaas_conf_template:
            File(format("{conf_dir}/kafka_client_jaas.conf"),
                 owner=params.kafka_user,
                 content=InlineTemplate(
                     params.kafka_client_jaas_conf_template))
        else:
            TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"),
                           owner=params.kafka_user)

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              create_parents=True,
              owner='root',
              group='root')

    File(os.path.join(params.limits_conf_dir, 'kafka.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("kafka.conf.j2"))

    File(os.path.join(params.conf_dir, 'tools-log4j.properties'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("tools-log4j.properties.j2"))

    setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir)
    setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
Example #25
0
    except:
        return None


# server configurations
config = Script.get_config()
exec_tmp_dir = Script.get_tmp_dir()
sudo = AMBARI_SUDO_BINARY

stack_name = default("/hostLevelParams/stack_name", None)
retryAble = default("/commandParams/command_retry_enabled", False)

version = default("/commandParams/version", None)

stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
stack_version = format_stack_version(stack_version_unformatted)
stack_root = status_params.stack_root

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)

component_directory = status_params.component_directory

#hadoop params
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
daemon_script = format(
    '/usr/iop/current/{component_directory}/bin/hbase-daemon.sh')
region_mover = format(
Example #26
0
def setup_spark(env, type, upgrade_type=None, action=None, config_dir=None):
  """
  :param env: Python environment
  :param type: Spark component type
  :param upgrade_type: If in a stack upgrade, either UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING
  :param action: Action to perform, such as generate configs
  :param config_dir: Optional config directory to write configs to.
  """

  import params

  if config_dir is None:
    config_dir = params.spark_conf

  Directory([params.spark_pid_dir, params.spark_log_dir],
            owner=params.spark_user,
            group=params.user_group,
            mode=0775,
            create_parents = True
  )
  if type == 'server' and action == 'config':
    params.HdfsResource(params.spark_hdfs_user_dir,
                       type="directory",
                       action="create_on_execute",
                       owner=params.spark_user,
                       mode=0775
    )
    params.HdfsResource(None, action="execute")

  PropertiesFile(os.path.join(config_dir, "spark-defaults.conf"),
    properties = params.config['configurations']['spark-defaults'],
    key_value_delimiter = " ",
    owner=params.spark_user,
    group=params.spark_group,
  )

  # create spark-env.sh in etc/conf dir
  File(os.path.join(config_dir, 'spark-env.sh'),
       owner=params.spark_user,
       group=params.spark_group,
       content=InlineTemplate(params.spark_env_sh),
       mode=0644,
  )

  #create log4j.properties in etc/conf dir
  File(os.path.join(config_dir, 'log4j.properties'),
       owner=params.spark_user,
       group=params.spark_group,
       content=params.spark_log4j_properties,
       mode=0644,
  )

  #create metrics.properties in etc/conf dir
  File(os.path.join(config_dir, 'metrics.properties'),
       owner=params.spark_user,
       group=params.spark_group,
       content=InlineTemplate(params.spark_metrics_properties)
  )

  File(os.path.join(params.spark_conf, 'java-opts'),
      owner=params.spark_user,
      group=params.spark_group,
      content=InlineTemplate(params.spark_javaopts_properties)
  )

  Directory(params.spark_logs_dir,
       owner=params.spark_user,
       group=params.spark_group,
       mode=0755,   
  )

  if params.is_hive_installed:
    XmlConfig("hive-site.xml",
          conf_dir=config_dir,
          configurations=params.spark_hive_properties,
          owner=params.spark_user,
          group=params.spark_group,
          mode=0644)

  if params.has_spark_thriftserver:
    PropertiesFile(params.spark_thrift_server_conf_file,
      properties = params.config['configurations']['spark-thrift-sparkconf'],
      owner = params.hive_user,
      group = params.user_group,
      key_value_delimiter = " ",
    )

  effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
  if effective_version:
    effective_version = format_stack_version(effective_version)

  if params.spark_thrift_fairscheduler_content and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
    # create spark-thrift-fairscheduler.xml
    File(os.path.join(config_dir,"spark-thrift-fairscheduler.xml"),
      owner=params.spark_user,
      group=params.spark_group,
      mode=0755,
      content=InlineTemplate(params.spark_thrift_fairscheduler_content)
    )
Example #27
0
def kafka(upgrade_type=None):
    import params
    ensure_base_directories()

    kafka_server_config = mutable_config_dict(
        params.config['configurations']['kafka-broker'])
    # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2.
    # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to.

    effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(
        params.version)
    Logger.info(format("Effective stack version: {effective_version}"))

    # In HDP-2.2 (Apache Kafka 0.8.1.1) we used to generate broker.ids based on hosts and add them to
    # kafka's server.properties. In future version brokers can generate their own ids based on zookeeper seq
    # We need to preserve the broker.id when user is upgrading from HDP-2.2 to any higher version.
    # Once its preserved it will be written to kafka.log.dirs/meta.properties and it will be used from there on
    # similarly we need preserve port as well during the upgrade

    if upgrade_type is not None and params.upgrade_direction == Direction.UPGRADE and \
      check_stack_feature(StackFeature.CREATE_KAFKA_BROKER_ID, params.current_version) and \
      check_stack_feature(StackFeature.KAFKA_LISTENERS, params.version):
        if len(params.kafka_hosts
               ) > 0 and params.hostname in params.kafka_hosts:
            brokerid = str(sorted(params.kafka_hosts).index(params.hostname))
            kafka_server_config['broker.id'] = brokerid
            Logger.info(format("Calculating broker.id as {brokerid}"))
        if 'port' in kafka_server_config:
            port = kafka_server_config['port']
            Logger.info(format("Port config from previous verson: {port}"))
            listeners = kafka_server_config['listeners']
            kafka_server_config['listeners'] = listeners.replace("6667", port)
            Logger.info(
                format("Kafka listeners after the port update: {listeners}"))
            del kafka_server_config['port']


    if effective_version is not None and effective_version != "" and \
      check_stack_feature(StackFeature.CREATE_KAFKA_BROKER_ID, effective_version):
        if len(params.kafka_hosts
               ) > 0 and params.hostname in params.kafka_hosts:
            brokerid = str(sorted(params.kafka_hosts).index(params.hostname))
            kafka_server_config['broker.id'] = brokerid
            Logger.info(format("Calculating broker.id as {brokerid}"))

    # listeners and advertised.listeners are only added in 2.3.0.0 onwards.
    if effective_version is not None and effective_version != "" and \
       check_stack_feature(StackFeature.KAFKA_LISTENERS, effective_version):

        listeners = kafka_server_config['listeners'].replace(
            "localhost", params.hostname)
        Logger.info(format("Kafka listeners: {listeners}"))
        kafka_server_config['listeners'] = listeners

        if params.security_enabled and params.kafka_kerberos_enabled:
            Logger.info("Kafka kerberos security is enabled.")
            kafka_server_config['advertised.listeners'] = listeners
            Logger.info(format("Kafka advertised listeners: {listeners}"))
        elif 'advertised.listeners' in kafka_server_config:
            advertised_listeners = kafka_server_config[
                'advertised.listeners'].replace("localhost", params.hostname)
            kafka_server_config['advertised.listeners'] = advertised_listeners
            Logger.info(
                format("Kafka advertised listeners: {advertised_listeners}"))
    else:
        kafka_server_config['host.name'] = params.hostname

    if params.has_metric_collector:
        kafka_server_config[
            'kafka.timeline.metrics.hosts'] = params.ams_collector_hosts
        kafka_server_config[
            'kafka.timeline.metrics.port'] = params.metric_collector_port
        kafka_server_config[
            'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol
        kafka_server_config[
            'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path
        kafka_server_config[
            'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type
        kafka_server_config[
            'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password

    kafka_data_dir = kafka_server_config['log.dirs']
    kafka_data_dirs = filter(None, kafka_data_dir.split(","))

    rack = "/default-rack"
    i = 0
    if len(params.all_racks) > 0:
        for host in params.all_hosts:
            if host == params.hostname:
                rack = params.all_racks[i]
                break
            i = i + 1

    kafka_server_config['broker.rack'] = rack

    Directory(
        kafka_data_dirs,
        mode=0755,
        cd_access='a',
        owner=params.kafka_user,
        group=params.user_group,
        create_parents=True,
        recursive_ownership=True,
    )

    PropertiesFile(
        "server.properties",
        dir=params.conf_dir,
        properties=kafka_server_config,
        owner=params.kafka_user,
        group=params.user_group,
    )

    File(format("{conf_dir}/kafka-env.sh"),
         owner=params.kafka_user,
         content=InlineTemplate(params.kafka_env_sh_template))

    if (params.log4j_props != None):
        File(format("{conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.kafka_user,
             content=InlineTemplate(params.log4j_props))

    if params.security_enabled and params.kafka_kerberos_enabled:
        if params.kafka_jaas_conf_template:
            File(format("{conf_dir}/kafka_jaas.conf"),
                 owner=params.kafka_user,
                 content=InlineTemplate(params.kafka_jaas_conf_template))
        else:
            TemplateConfig(format("{conf_dir}/kafka_jaas.conf"),
                           owner=params.kafka_user)

        if params.kafka_client_jaas_conf_template:
            File(format("{conf_dir}/kafka_client_jaas.conf"),
                 owner=params.kafka_user,
                 content=InlineTemplate(
                     params.kafka_client_jaas_conf_template))
        else:
            TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"),
                           owner=params.kafka_user)

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              create_parents=True,
              owner='root',
              group='root')

    File(os.path.join(params.limits_conf_dir, 'kafka.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("kafka.conf.j2"))

    File(os.path.join(params.conf_dir, 'tools-log4j.properties'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("tools-log4j.properties.j2"))

    setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir)
    setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
Example #28
0
def falcon(type, action=None, upgrade_type=None):
    import params

    if action == 'config':
        Directory(
            params.falcon_pid_dir,
            owner=params.falcon_user,
            create_parents=True,
            mode=0755,
            cd_access="a",
        )

        Directory(
            params.falcon_log_dir,
            owner=params.falcon_user,
            create_parents=True,
            mode=0755,
            cd_access="a",
        )

        Directory(params.falcon_webapp_dir,
                  owner=params.falcon_user,
                  create_parents=True)

        Directory(params.falcon_home,
                  owner=params.falcon_user,
                  create_parents=True)

        Directory(params.etc_prefix_dir, mode=0755, create_parents=True)

        Directory(params.falcon_conf_dir,
                  owner=params.falcon_user,
                  create_parents=True)

        File(
            params.falcon_conf_dir + '/falcon-env.sh',
            content=InlineTemplate(params.falcon_env_sh_template),
            owner=params.falcon_user,
            group=params.user_group,
        )

        PropertiesFile(params.falcon_conf_dir + '/client.properties',
                       properties=params.falcon_client_properties,
                       mode=0644,
                       owner=params.falcon_user)

        PropertiesFile(params.falcon_conf_dir + '/runtime.properties',
                       properties=params.falcon_runtime_properties,
                       mode=0644,
                       owner=params.falcon_user)

        PropertiesFile(params.falcon_conf_dir + '/startup.properties',
                       properties=params.falcon_startup_properties,
                       mode=0644,
                       owner=params.falcon_user)

        File(params.falcon_conf_dir + '/log4j.properties',
             content=InlineTemplate(params.falcon_log4j),
             group=params.user_group,
             mode=0644,
             owner=params.falcon_user)

        if params.falcon_graph_storage_directory:
            Directory(params.falcon_graph_storage_directory,
                      owner=params.falcon_user,
                      group=params.user_group,
                      mode=0775,
                      create_parents=True,
                      cd_access="a")

        if params.falcon_graph_serialize_path:
            Directory(params.falcon_graph_serialize_path,
                      owner=params.falcon_user,
                      group=params.user_group,
                      mode=0775,
                      create_parents=True,
                      cd_access="a")

        # Generate atlas-application.properties.xml file
        if params.falcon_atlas_support and params.enable_atlas_hook:
            # If Atlas is added later than Falcon, this package will be absent.
            install_atlas_hook_packages(
                params.atlas_plugin_package,
                params.atlas_ubuntu_plugin_package, params.host_sys_prepped,
                params.agent_stack_retry_on_unavailability,
                params.agent_stack_retry_count)

            atlas_hook_filepath = os.path.join(params.falcon_conf_dir,
                                               params.atlas_hook_filename)
            setup_atlas_hook(SERVICE.FALCON,
                             params.falcon_atlas_application_properties,
                             atlas_hook_filepath, params.falcon_user,
                             params.user_group)

            # Falcon 0.10 uses FALCON_EXTRA_CLASS_PATH.
            # Setup symlinks for older versions.
            if params.current_version_formatted and check_stack_feature(
                    StackFeature.FALCON_ATLAS_SUPPORT_2_3,
                    params.current_version_formatted):
                setup_atlas_jar_symlinks("falcon", params.falcon_webinf_lib)

    if type == 'server':
        if action == 'config':
            if params.store_uri[0:4] == "hdfs":
                params.HdfsResource(params.store_uri,
                                    type="directory",
                                    action="create_on_execute",
                                    owner=params.falcon_user,
                                    mode=0755)
            elif params.store_uri[0:4] == "file":
                Directory(params.store_uri[7:],
                          owner=params.falcon_user,
                          create_parents=True)

            # TODO change to proper mode
            params.HdfsResource(params.falcon_apps_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.falcon_user,
                                mode=0777)

            # In HDP 2.4 and earlier, the data-mirroring directory was copied to HDFS.
            if params.supports_data_mirroring:
                params.HdfsResource(params.dfs_data_mirroring_dir,
                                    type="directory",
                                    action="create_on_execute",
                                    owner=params.falcon_user,
                                    group=params.proxyuser_group,
                                    recursive_chown=True,
                                    recursive_chmod=True,
                                    mode=0770,
                                    source=params.local_data_mirroring_dir)

            # Falcon Extensions were supported in HDP 2.5 and higher.
            effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(
                params.version)
            supports_falcon_extensions = effective_version and check_stack_feature(
                StackFeature.FALCON_EXTENSIONS, effective_version)

            if supports_falcon_extensions:
                params.HdfsResource(params.falcon_extensions_dest_dir,
                                    type="directory",
                                    action="create_on_execute",
                                    owner=params.falcon_user,
                                    group=params.proxyuser_group,
                                    recursive_chown=True,
                                    recursive_chmod=True,
                                    mode=0755,
                                    source=params.falcon_extensions_source_dir)
                # Create the extensons HiveDR store
                params.HdfsResource(os.path.join(
                    params.falcon_extensions_dest_dir, "mirroring"),
                                    type="directory",
                                    action="create_on_execute",
                                    owner=params.falcon_user,
                                    group=params.proxyuser_group,
                                    mode=0770)

            # At least one HDFS Dir should be created, so execute the change now.
            params.HdfsResource(None, action="execute")

            Directory(params.falcon_local_dir,
                      owner=params.falcon_user,
                      create_parents=True,
                      cd_access="a")

            if params.falcon_embeddedmq_enabled == True:
                Directory(os.path.abspath(
                    os.path.join(params.falcon_embeddedmq_data, "..")),
                          owner=params.falcon_user,
                          create_parents=True)

                Directory(params.falcon_embeddedmq_data,
                          owner=params.falcon_user,
                          create_parents=True)

        # although Falcon's falcon-config.sh will use 'which hadoop' to figure
        # this out, in an upgraded cluster, it's possible that 'which hadoop'
        # still points to older binaries; it's safer to just pass in the
        # hadoop home directory to use
        environment_dictionary = {"HADOOP_HOME": params.hadoop_home_dir}

        pid = get_user_call_output.get_user_call_output(
            format("cat {server_pid_file}"),
            user=params.falcon_user,
            is_checked_call=False)[1]
        process_exists = format("ls {server_pid_file} && ps -p {pid}")

        if action == 'start':
            try:
                Execute(
                    format('{falcon_home}/bin/falcon-config.sh server falcon'),
                    user=params.falcon_user,
                    path=params.hadoop_bin_dir,
                    environment=environment_dictionary,
                    not_if=process_exists,
                )
            except:
                show_logs(params.falcon_log_dir, params.falcon_user)
                raise

            if not os.path.exists(params.target_jar_file):
                try:
                    File(params.target_jar_file,
                         content=DownloadSource(params.bdb_resource_name),
                         mode=0755)
                except:
                    exc_msg = traceback.format_exc()
                    exception_message = format(
                        "Caught Exception while downloading {bdb_resource_name}:\n{exc_msg}"
                    )
                    Logger.error(exception_message)

                if not os.path.isfile(params.target_jar_file):
                    error_message = """
If you are using bdb as the Falcon graph db store, please run
ambari-server setup --jdbc-db=bdb --jdbc-driver=<path to je5.0.73.jar>
on the ambari server host.  Otherwise falcon startup will fail.
Otherwise please configure Falcon to use HBase as the backend as described
in the Falcon documentation.
"""
                    Logger.error(error_message)
            try:
                Execute(
                    format(
                        '{falcon_home}/bin/falcon-start -port {falcon_port}'),
                    user=params.falcon_user,
                    path=params.hadoop_bin_dir,
                    environment=environment_dictionary,
                    not_if=process_exists,
                )
            except:
                show_logs(params.falcon_log_dir, params.falcon_user)
                raise

        if action == 'stop':
            try:
                Execute(format('{falcon_home}/bin/falcon-stop'),
                        user=params.falcon_user,
                        path=params.hadoop_bin_dir,
                        environment=environment_dictionary)
            except:
                show_logs(params.falcon_log_dir, params.falcon_user)
                raise

            File(params.server_pid_file, action='delete')
Example #29
0
  def pre_rolling_restart(self, env):
    import params
    env.set_params(params)

    if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0:
      stack_select.select_packages(params.version)
Example #30
0
def kafka(upgrade_type=None):
    import params
    ensure_base_directories()

    kafka_server_config = mutable_config_dict(
        params.config['configurations']['kafka-broker'])
    # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2.
    # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to.

    effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(
        params.version)
    Logger.info(format("Effective stack version: {effective_version}"))

    # listeners and advertised.listeners are only added in 2.3.0.0 onwards.
    if effective_version is not None and effective_version != "" and \
       check_stack_feature(StackFeature.KAFKA_LISTENERS, effective_version):

        listeners = kafka_server_config['listeners'].replace(
            "localhost", params.hostname)
        Logger.info(format("Kafka listeners: {listeners}"))
        kafka_server_config['listeners'] = listeners

        if params.kerberos_security_enabled and params.kafka_kerberos_enabled:
            Logger.info("Kafka kerberos security is enabled.")

            if "SASL" not in listeners:
                listeners = kafka_server_config['listeners']
                listeners = re.sub(r"(^|\b)PLAINTEXT://", "SASL_PLAINTEXT://",
                                   listeners)
                listeners = re.sub(r"(^|\b)PLAINTEXTSASL://",
                                   "SASL_PLAINTEXT://", listeners)
                listeners = re.sub(r"(^|\b)SSL://", "SASL_SSL://", listeners)
                kafka_server_config['listeners'] = listeners

            kafka_server_config['advertised.listeners'] = listeners
            Logger.info(format("Kafka advertised listeners: {listeners}"))
        elif 'advertised.listeners' in kafka_server_config:
            advertised_listeners = kafka_server_config[
                'advertised.listeners'].replace("localhost", params.hostname)
            kafka_server_config['advertised.listeners'] = advertised_listeners
            Logger.info(
                format("Kafka advertised listeners: {advertised_listeners}"))
    else:
        kafka_server_config['host.name'] = params.hostname

    if params.has_metric_collector:
        kafka_server_config[
            'kafka.timeline.metrics.hosts'] = params.ams_collector_hosts
        kafka_server_config[
            'kafka.timeline.metrics.port'] = params.metric_collector_port
        kafka_server_config[
            'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol
        kafka_server_config[
            'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path
        kafka_server_config[
            'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type
        kafka_server_config[
            'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password

    kafka_data_dir = kafka_server_config['log.dirs']
    kafka_data_dirs = filter(None, kafka_data_dir.split(","))

    rack = "/default-rack"
    i = 0
    if len(params.all_racks) > 0:
        for host in params.all_hosts:
            if host == params.hostname:
                rack = params.all_racks[i]
                break
            i = i + 1

    Directory(
        kafka_data_dirs,
        mode=0755,
        cd_access='a',
        owner=params.kafka_user,
        group=params.user_group,
        create_parents=True,
        recursive_ownership=True,
    )

    PropertiesFile(
        "server.properties",
        mode=0640,
        dir=params.conf_dir,
        properties=kafka_server_config,
        owner=params.kafka_user,
        group=params.user_group,
    )

    File(format("{conf_dir}/kafka-env.sh"),
         owner=params.kafka_user,
         content=InlineTemplate(params.kafka_env_sh_template))

    if (params.log4j_props != None):
        File(format("{conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.kafka_user,
             content=InlineTemplate(params.log4j_props))

    if (params.kerberos_security_enabled and
            params.kafka_kerberos_enabled) or params.kafka_other_sasl_enabled:
        if params.kafka_jaas_conf_template:
            File(format("{conf_dir}/kafka_jaas.conf"),
                 owner=params.kafka_user,
                 content=InlineTemplate(params.kafka_jaas_conf_template))
        else:
            TemplateConfig(format("{conf_dir}/kafka_jaas.conf"),
                           owner=params.kafka_user)

        if params.kafka_client_jaas_conf_template:
            File(format("{conf_dir}/kafka_client_jaas.conf"),
                 owner=params.kafka_user,
                 content=InlineTemplate(
                     params.kafka_client_jaas_conf_template))
        else:
            TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"),
                           owner=params.kafka_user)

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              create_parents=True,
              owner='root',
              group='root')

    File(os.path.join(params.limits_conf_dir, 'kafka.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("kafka.conf.j2"))

    File(os.path.join(params.conf_dir, 'tools-log4j.properties'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("tools-log4j.properties.j2"))

    generate_logfeeder_input_config(
        'kafka', Template("input.config-kafka.json.j2",
                          extra_imports=[default]))

    setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir)
    setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
Example #31
0
def kafka(upgrade_type=None):
    import params
    ensure_base_directories()

    kafka_server_config = mutable_config_dict(params.config['configurations']['kafka-broker'])
    # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2.
    # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to.

    effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version)
    Logger.info(format("Effective stack version: {effective_version}"))

    if effective_version is not None and effective_version != "" and compare_versions(effective_version, '2.2.0.0') >= 0 and compare_versions(effective_version, '2.3.0.0') < 0:
      if len(params.kafka_hosts) > 0 and params.hostname in params.kafka_hosts:
        brokerid = str(sorted(params.kafka_hosts).index(params.hostname))
        kafka_server_config['broker.id'] = brokerid
        Logger.info(format("Calculating broker.id as {brokerid}"))

    # listeners and advertised.listeners are only added in 2.3.0.0 onwards.
    if effective_version is not None and effective_version != "" and compare_versions(effective_version, '2.3.0.0') >= 0:
      listeners = kafka_server_config['listeners'].replace("localhost", params.hostname)
      Logger.info(format("Kafka listeners: {listeners}"))

      if params.security_enabled and params.kafka_kerberos_enabled:
        Logger.info("Kafka kerberos security is enabled.")
        if "SASL" not in listeners:
          listeners = listeners.replace("PLAINTEXT", "PLAINTEXTSASL")

        kafka_server_config['listeners'] = listeners
        kafka_server_config['advertised.listeners'] = listeners
        Logger.info(format("Kafka advertised listeners: {listeners}"))
      else:
        kafka_server_config['listeners'] = listeners

        if 'advertised.listeners' in kafka_server_config:
          advertised_listeners = kafka_server_config['advertised.listeners'].replace("localhost", params.hostname)
          kafka_server_config['advertised.listeners'] = advertised_listeners
          Logger.info(format("Kafka advertised listeners: {advertised_listeners}"))
    else:
      kafka_server_config['host.name'] = params.hostname

    if params.has_metric_collector:
      kafka_server_config['kafka.timeline.metrics.host'] = params.metric_collector_host
      kafka_server_config['kafka.timeline.metrics.port'] = params.metric_collector_port
      kafka_server_config['kafka.timeline.metrics.protocol'] = params.metric_collector_protocol
      kafka_server_config['kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path
      kafka_server_config['kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type
      kafka_server_config['kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password

    kafka_data_dir = kafka_server_config['log.dirs']
    kafka_data_dirs = filter(None, kafka_data_dir.split(","))
    Directory(kafka_data_dirs,
              mode=0755,
              cd_access='a',
              owner=params.kafka_user,
              group=params.user_group,
              create_parents = True,
              recursive_ownership = True,
    )

    PropertiesFile("server.properties",
                      dir=params.conf_dir,
                      properties=kafka_server_config,
                      owner=params.kafka_user,
                      group=params.user_group,
    )

    File(format("{conf_dir}/kafka-env.sh"),
          owner=params.kafka_user,
          content=InlineTemplate(params.kafka_env_sh_template)
     )

    if (params.log4j_props != None):
        File(format("{conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.kafka_user,
             content=params.log4j_props
         )

    if params.security_enabled and params.kafka_kerberos_enabled:
        TemplateConfig(format("{conf_dir}/kafka_jaas.conf"),
                         owner=params.kafka_user)

        TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"),
                       owner=params.kafka_user)

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              create_parents = True,
              owner='root',
              group='root'
    )

    File(os.path.join(params.limits_conf_dir, 'kafka.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("kafka.conf.j2")
    )

    File(os.path.join(params.conf_dir, 'tools-log4j.properties'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("tools-log4j.properties.j2")
         )

    setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir)
    setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
Example #32
0
def prestart(env, component):
  import params

  if params.version and compare_versions(format_stack_version(params.version), '4.1.0.0') >= 0:
    conf_select.select(params.stack_name, "kafka", params.version)
    stack_select.select(component, params.version)
def install_windows_msi(url_base, save_dir, save_files, hadoop_user,
                        hadoop_password, stack_version):
    global _working_dir
    _working_dir = save_dir
    save_dir = os.path.abspath(save_dir)
    msi_save_dir = save_dir
    # system wide lock to prevent simultaneous installations(when first task failed on timeout)
    install_lock = SystemWideLock("Global\\hdp_msi_lock")
    try:
        # try to acquire lock
        if not install_lock.lock():
            Logger.info(
                "Some other task currently installing hdp.msi, waiting for 10 min for finish"
            )
            if not install_lock.lock(600000):
                raise Fail("Timeout on acquiring lock")
        if _validate_msi_install():
            Logger.info("hdp.msi already installed")
            return

        stack_version_formatted = format_stack_version(stack_version)
        hdp_22_specific_props = ''
        if stack_version_formatted != "" and compare_versions(
                stack_version_formatted, '2.2') >= 0:
            hdp_22_specific_props = hdp_22.format(data_dir=data_dir)

        # MSIs cannot be larger than 2GB. HDPWIN 2.3 needed split in order to accommodate this limitation
        msi_file = ''
        for save_file in save_files:
            if save_file.lower().endswith(".msi"):
                msi_file = save_file
            file_url = urlparse.urljoin(url_base, save_file)
            try:
                download_file(file_url, os.path.join(msi_save_dir, save_file))
            except:
                raise Fail("Failed to download {url}".format(url=file_url))

        File(os.path.join(msi_save_dir, "properties.txt"),
             content=cluster_properties.format(
                 log_dir=log_dir,
                 data_dir=data_dir,
                 local_host=local_host,
                 db_flavor=db_flavor,
                 hdp_22_specific_props=hdp_22_specific_props))

        # install msi
        msi_path = os_utils.quote_path(os.path.join(save_dir, msi_file))
        log_path = os_utils.quote_path(
            os.path.join(save_dir, msi_file[:-3] + "log"))
        layout_path = os_utils.quote_path(
            os.path.join(save_dir, "properties.txt"))
        hadoop_password_arg = os_utils.quote_path(hadoop_password)

        Execute(
            INSTALL_MSI_CMD.format(msi_path=msi_path,
                                   log_path=log_path,
                                   layout_path=layout_path,
                                   hadoop_user=hadoop_user,
                                   hadoop_password_arg=hadoop_password_arg))
        reload_windows_env()
        # create additional services manually due to hdp.msi limitaitons
        _ensure_services_created(hadoop_user, hadoop_password)
        _create_symlinks(stack_version)
        # finalizing install
        _write_marker()
        _validate_msi_install()
    finally:
        install_lock.unlock()
Example #34
0
sudo = AMBARI_SUDO_BINARY

# Global flag enabling or disabling the sysprep feature
host_sys_prepped = default("/ambariLevelParams/host_sys_prepped", False)

# Whether to skip copying fast-hdfs-resource.jar to /var/lib/ambari-agent/lib/
# This is required if tarballs are going to be copied to HDFS, so set to False
sysprep_skip_copy_fast_jar_hdfs = host_sys_prepped and default(
    "/configurations/cluster-env/sysprep_skip_copy_fast_jar_hdfs", False)

# Whether to skip setting up the unlimited key JCE policy
sysprep_skip_setup_jce = host_sys_prepped and default(
    "/configurations/cluster-env/sysprep_skip_setup_jce", False)

stack_version_unformatted = config['clusterLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)
major_stack_version = get_major_version(stack_version_formatted)

dfs_type = default("/clusterLevelParams/dfs_type", "")
hadoop_conf_dir = "/etc/hadoop"
component_list = default("/localComponents", [])

hdfs_tmp_dir = default("/configurations/hadoop-env/hdfs_tmp_dir", "/tmp")

hadoop_metrics2_properties_content = None
if 'hadoop-metrics2.properties' in config['configurations']:
    hadoop_metrics2_properties_content = config['configurations'][
        'hadoop-metrics2.properties']['content']

hadoop_home = stack_root + '/hadoop'
hadoop_libexec_dir = hadoop_home + "/libexec"
Example #35
0
def prestart(env):
  import params

  if params.version and compare_versions(format_stack_version(params.version), '4.0.0.0') >= 0:
    stack_select.select_packages(params.version)
def spark_service(name, upgrade_type=None, action=None):
  import params

  if action == 'start':

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
      effective_version = format_stack_version(effective_version)

    if effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
      # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs
      source_dir=params.spark_home+"/jars"
      tmp_archive_file="/tmp/spark2/spark2-hdp-yarn-archive.tar.gz"
      make_tarfile(tmp_archive_file, source_dir)
      copy_to_hdfs("spark2", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      # create spark history directory
      params.HdfsResource(params.spark_history_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          group=params.user_group,
                          mode=0777,
                          recursive_chmod=True
                          )
      params.HdfsResource(None, action="execute")

    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      if resource_created:
        params.HdfsResource(None, action="execute")

    if name == 'jobhistoryserver':
      historyserver_no_op_test = format(
      'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_history_server_start}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=historyserver_no_op_test)
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

    elif name == 'sparkthriftserver':
      if params.security_enabled:
        hive_principal = params.hive_kerberos_principal.replace('_HOST', socket.getfqdn().lower())
        hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ")
        Execute(hive_kinit_cmd, user=params.hive_user)

      thriftserver_no_op_test = format(
      'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'),
                user=params.hive_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=thriftserver_no_op_test
        )
      except:
        show_logs(params.spark_log_dir, user=params.hive_user)
        raise
  elif action == 'stop':
    if name == 'jobhistoryserver':
      try:
        Execute(format('{spark_history_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_history_server_pid_file,
        action="delete"
      )

    elif name == 'sparkthriftserver':
      try:
        Execute(format('{spark_thrift_server_stop}'),
                user=params.hive_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.hive_user)
        raise
      File(params.spark_thrift_server_pid_file,
        action="delete"
      )
Example #37
0
agent_stack_retry_on_unavailability = config['hostLevelParams'][
    'agent_stack_retry_on_unavailability']
agent_stack_retry_count = expect("/hostLevelParams/agent_stack_retry_count",
                                 int)

# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
version = default("/commandParams/version", None)

stack_version_unformatted = status_params.stack_version_unformatted
stack_version_formatted = status_params.stack_version_formatted
upgrade_direction = default("/commandParams/upgrade_direction", None)

# current host stack version
current_version = default("/hostLevelParams/current_version", None)
current_version_formatted = format_stack_version(current_version)

etc_prefix_dir = "/etc/falcon"

# hadoop params
hadoop_home_dir = stack_select.get_hadoop_dir("home")
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")

if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    # if this is a server action, then use the server binaries; smoke tests
    # use the client binaries
    server_role_dir_mapping = {
        'FALCON_SERVER': 'falcon-server',
        'FALCON_SERVICE_CHECK': 'falcon-client'
    }
Example #38
0
from resource_management.libraries.functions import get_kinit_path
from resource_management.libraries.functions.get_not_managed_resources import get_not_managed_resources
from resource_management.libraries.functions.setup_ranger_plugin_xml import get_audit_configs, generate_ranger_service_config

# server configurations
config = Script.get_config()
tmp_dir = Script.get_tmp_dir()
stack_root = Script.get_stack_root()
stack_name = default("/clusterLevelParams/stack_name", None)
retryAble = default("/commandParams/command_retry_enabled", False)

# Version being upgraded/downgraded to
version = default("/commandParams/version", None)

stack_version_unformatted = config['clusterLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)
upgrade_direction = default("/commandParams/upgrade_direction", None)

# get the correct version to use for checking stack features
version_for_stack_feature_checks = get_stack_feature_version(config)

stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)
stack_supports_core_site_for_ranger_plugin = check_stack_feature(
    StackFeature.CORE_SITE_FOR_RANGER_PLUGINS_SUPPORT,
    version_for_stack_feature_checks)

# When downgrading the 'version' is pointing to the downgrade-target version
# downgrade_from_version provides the source-version the downgrade is happening from
Example #39
0
    'alluxio.archive.file']

# alluxio master address
if 'clusterHostInfo' in config:
    alluxio_master = config['clusterHostInfo']['alluxio_master_hosts']

# alluxio underfs address
underfs_addr = config['configurations']['alluxio-env'][
    'alluxio.underfs.address']

# alluxio worker memory allotment
worker_mem = config['configurations']['alluxio-env']['alluxio.worker.memory']

# Find current stack and version to push agent files to
stack_name = default("/hostLevelParams/stack_name", None)
stack_version = format_stack_version(default("/commandParams/version", "2.6"))

# Set install dir
usr_base = "/usr/hdp/"
base_dir = usr_base + stack_version[:3] + "/alluxio/"

# Alluxio archive on agent nodes
alluxio_package_dir = "/var/lib/ambari-agent/cache/stacks/" + stack_name + "/" + stack_version[:
                                                                                               3] + "/services/ALLUXIO/package/"

# alluxio log dir
log_dir = config['configurations']['alluxio-env']['alluxio.log.dir']

# alluxio log dir
pid_dir = config['configurations']['alluxio-env']['alluxio.pid.dir']