Example #1
0
def get_spark_version(service_name, component_name, yarn_version):
  """
  Attempts to calculate the correct version placeholder value for spark or spark2 based on
  what is installed in the cluster. If Spark is not installed, then this value will need to be
  that of YARN so it can still find the correct shuffle class.

  On cluster installs, we have not yet calcualted any versions and all known values could be None.
  This doesn't affect daemons, but it does affect client-only hosts where they will never receive
  a start command after install. Therefore, this function will attempt to use stack-select as a
  last resort to get a value value.
  :param service_name:  the service name (SPARK, SPARK2, etc)
  :param component_name:  the component name (SPARK_CLIENT, etc)
  :param yarn_version:  the default version of Yarn to use if no spark is installed
  :return:  a value for the version placeholder in shuffle classpath properties
  """
  # start off seeing if we need to populate a default value for YARN
  if yarn_version is None:
    yarn_version = component_version.get_component_repository_version(service_name = "YARN",
      component_name = "YARN_CLIENT")

  # now try to get the version of spark/spark2, defaulting to the version if YARN
  shuffle_classpath_version = component_version.get_component_repository_version(service_name = service_name,
    component_name = component_name, default_value = yarn_version)

  # even with the default of using YARN's version, on an install this might be None since we haven't
  # calculated the version of YARN yet - use stack_select as a last ditch effort
  if shuffle_classpath_version is None:
    try:
      shuffle_classpath_version = stack_select.get_role_component_current_stack_version()
    except:
      Logger.exception("Unable to query for the correct shuffle classpath")

  return shuffle_classpath_version
Example #2
0
  def test_get_component_version_by_service_name(self):
    """
    Tests that the component version map can be parsed using only the service name
    :return:
    """
    command_json = TestComponentVersionMapping._get_component_version_mappings()
    Script.config = command_json

    version = component_version.get_component_repository_version(service_name="HDFS")
    self.assertEqual(version, "2.5.0.0-1234")

    version = component_version.get_component_repository_version(service_name = "ZOOKEEPER")
    self.assertEqual(version, "2.6.0.0-9999")
Example #3
0
def get_hadoop_dir(target):
  """
  Return the hadoop shared directory which should be used for the command's component. The
  directory including the component's version is tried first, but if that doesn't exist,
  this will fallback to using "current".

  :target: the target directory
  """
  stack_root = Script.get_stack_root()
  stack_version = Script.get_stack_version()

  if not target in HADOOP_DIR_DEFAULTS:
    raise Fail("Target {0} not defined".format(target))

  hadoop_dir = HADOOP_DIR_DEFAULTS[target]

  formatted_stack_version = format_stack_version(stack_version)

  if stack_features.check_stack_feature(StackFeature.ROLLING_UPGRADE, formatted_stack_version):
    # read the desired version from the component map and use that for building the hadoop home
    version = component_version.get_component_repository_version()
    if version is None:
      version = default("/commandParams/version", None)

    # home uses a different template
    if target == "home":
      hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, version, "hadoop")
      if version is None or sudo.path_isdir(hadoop_dir) is False:
        hadoop_dir = HADOOP_HOME_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client")
    else:
      hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, version, "hadoop", target)
      if version is None or sudo.path_isdir(hadoop_dir) is False:
        hadoop_dir = HADOOP_DIR_TEMPLATE.format(stack_root, "current", "hadoop-client", target)

  return hadoop_dir
Example #4
0
def get_hadoop_conf_dir():
    """
  Return the hadoop shared conf directory which should be used for the command's component. The
  directory including the component's version is tried first, but if that doesn't exist,
  this will fallback to using "current".
  """
    stack_root = Script.get_stack_root()
    stack_version = Script.get_stack_version()

    hadoop_conf_dir = os.path.join(os.path.sep, "etc", "hadoop", "conf")
    if check_stack_feature(StackFeature.CONFIG_VERSIONING, stack_version):
        # read the desired version from the component map and use that for building the hadoop home
        version = component_version.get_component_repository_version()
        if version is None:
            version = default("/commandParams/version", None)

        hadoop_conf_dir = os.path.join(stack_root, str(version), "hadoop",
                                       "conf")
        if version is None or sudo.path_isdir(hadoop_conf_dir) is False:
            hadoop_conf_dir = os.path.join(stack_root, "current",
                                           "hadoop-client", "conf")

        Logger.info("Using hadoop conf dir: {0}".format(hadoop_conf_dir))

    return hadoop_conf_dir
Example #5
0
def _prepare_tez_tarball():
    """
  Prepares the Tez tarball by adding the Hadoop native libraries found in the mapreduce tarball.
  It's very important to use the version of mapreduce which matches tez here.
  Additionally, this will also copy native LZO to the tez tarball if LZO is enabled and the
  GPL license has been accepted.
  :return:  the full path of the newly created tez tarball to use
  """
    import tempfile

    Logger.info("Preparing the Tez tarball...")

    # get the mapreduce tarball which matches the version of tez
    # tez installs the mapreduce tar, so it should always be present
    _, mapreduce_source_file, _, _ = get_tarball_paths("mapreduce")
    _, tez_source_file, _, _ = get_tarball_paths("tez")

    temp_dir = Script.get_tmp_dir()

    # create the temp staging directories ensuring that non-root agents using tarfile can work with them
    mapreduce_temp_dir = tempfile.mkdtemp(prefix="mapreduce-tarball-",
                                          dir=temp_dir)
    tez_temp_dir = tempfile.mkdtemp(prefix="tez-tarball-", dir=temp_dir)
    sudo.chmod(mapreduce_temp_dir, 0777)
    sudo.chmod(tez_temp_dir, 0777)

    Logger.info("Extracting {0} to {1}".format(mapreduce_source_file,
                                               mapreduce_temp_dir))
    tar_archive.extract_archive(mapreduce_source_file, mapreduce_temp_dir)

    Logger.info("Extracting {0} to {1}".format(tez_source_file, tez_temp_dir))
    tar_archive.untar_archive(tez_source_file, tez_temp_dir)

    hadoop_lib_native_dir = os.path.join(mapreduce_temp_dir, "hadoop", "lib",
                                         "native")
    tez_lib_dir = os.path.join(tez_temp_dir, "lib")

    if not os.path.exists(hadoop_lib_native_dir):
        raise Fail(
            "Unable to seed the Tez tarball with native libraries since the source Hadoop native lib directory {0} does not exist"
            .format(hadoop_lib_native_dir))

    if not os.path.exists(tez_lib_dir):
        raise Fail(
            "Unable to seed the Tez tarball with native libraries since the target Tez lib directory {0} does not exist"
            .format(tez_lib_dir))

    # copy native libraries from hadoop to tez
    Execute(("cp", "-a", hadoop_lib_native_dir, tez_lib_dir), sudo=True)

    # if enabled, LZO GPL libraries must be copied as well
    if lzo_utils.should_install_lzo():
        stack_root = Script.get_stack_root()
        service_version = component_version.get_component_repository_version(
            service_name="TEZ")

        # some installations might not have Tez, but MapReduce2 should be a fallback to get the LZO libraries from
        if service_version is None:
            Logger.warning(
                "Tez does not appear to be installed, using the MapReduce version to get the LZO libraries"
            )
            service_version = component_version.get_component_repository_version(
                service_name="MAPREDUCE2")

        hadoop_lib_native_lzo_dir = os.path.join(stack_root, service_version,
                                                 "hadoop", "lib", "native")

        if not sudo.path_isdir(hadoop_lib_native_lzo_dir):
            Logger.warning(
                "Unable to located native LZO libraries at {0}, falling back to hadoop home"
                .format(hadoop_lib_native_lzo_dir))
            hadoop_lib_native_lzo_dir = os.path.join(stack_root, "current",
                                                     "hadoop-client", "lib",
                                                     "native")

        if not sudo.path_isdir(hadoop_lib_native_lzo_dir):
            raise Fail(
                "Unable to seed the Tez tarball with native libraries since LZO is enabled but the native LZO libraries could not be found at {0}"
                .format(hadoop_lib_native_lzo_dir))

        Execute(("cp", "-a", hadoop_lib_native_lzo_dir, tez_lib_dir),
                sudo=True)

    # ensure that the tez/lib directory is readable by non-root (which it typically is not)
    Directory(tez_lib_dir, mode=0755, cd_access='a', recursive_ownership=True)

    # create the staging directory so that non-root agents can write to it
    tez_native_tarball_staging_dir = os.path.join(
        temp_dir, "tez-native-tarball-staging")
    if not os.path.exists(tez_native_tarball_staging_dir):
        Directory(tez_native_tarball_staging_dir,
                  mode=0777,
                  cd_access='a',
                  create_parents=True,
                  recursive_ownership=True)

    tez_tarball_with_native_lib = os.path.join(tez_native_tarball_staging_dir,
                                               "tez-native.tar.gz")
    Logger.info("Creating a new Tez tarball at {0}".format(
        tez_tarball_with_native_lib))

    # tar up Tez, making sure to specify nothing for the arcname so that it does not include an absolute path
    with closing(tarfile.open(tez_tarball_with_native_lib,
                              "w:gz")) as new_tez_tarball:
        new_tez_tarball.add(tez_temp_dir, arcname=os.path.sep)

    # ensure that the tarball can be read and uploaded
    sudo.chmod(tez_tarball_with_native_lib, 0744)

    # cleanup
    sudo.rmtree(mapreduce_temp_dir)
    sudo.rmtree(tez_temp_dir)

    return tez_tarball_with_native_lib
Example #6
0
    mapred_role_root = "hadoop-mapreduce-client"
    command_role = default("/role", "")
    if command_role in MAPR_SERVER_ROLE_DIRECTORY_MAP:
        mapred_role_root = MAPR_SERVER_ROLE_DIRECTORY_MAP[command_role]

    # YARN directory root
    yarn_role_root = "hadoop-yarn-client"
    if command_role in YARN_SERVER_ROLE_DIRECTORY_MAP:
        yarn_role_root = YARN_SERVER_ROLE_DIRECTORY_MAP[command_role]

    # defaults set to current based on role
    hadoop_mapr_home = format("{stack_root}/current/{mapred_role_root}")
    hadoop_yarn_home = format("{stack_root}/current/{yarn_role_root}")

    # try to render the specific version
    version = component_version.get_component_repository_version()
    if version is None:
        version = default("/commandParams/version", None)

    if version is not None:
        hadoop_mapr_versioned_home = format(
            "{stack_root}/{version}/hadoop-mapreduce")
        hadoop_yarn_versioned_home = format(
            "{stack_root}/{version}/hadoop-yarn")

        if sudo.path_isdir(hadoop_mapr_versioned_home):
            hadoop_mapr_home = hadoop_mapr_versioned_home

        if sudo.path_isdir(hadoop_yarn_versioned_home):
            hadoop_yarn_home = hadoop_yarn_versioned_home
Example #7
0
    'hadoop-yarn-resourcemanager')
major_stack_version = get_major_version(stack_version_formatted_major)

stack_supports_ru = check_stack_feature(StackFeature.ROLLING_UPGRADE,
                                        version_for_stack_feature_checks)
stack_supports_timeline_state_store = check_stack_feature(
    StackFeature.TIMELINE_STATE_STORE, version_for_stack_feature_checks)

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade.
# It cannot be used during the initial Cluser Install because the version is not yet known.
version = default("/commandParams/version", None)

# these are used to render the classpath for picking up Spark classes
# in the event that spark is not installed, then we must default to the vesrion of YARN installed
# since it will still load classes from its own spark version
spark_version = component_version.get_component_repository_version(
    service_name="SPARK", component_name="SPARK_CLIENT", default_value=version)
spark2_version = component_version.get_component_repository_version(
    service_name="SPARK2",
    component_name="SPARK2_CLIENT",
    default_value=version)

stack_supports_ranger_kerberos = check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, version_for_stack_feature_checks)
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)

hostname = config['hostname']

# hadoop default parameters
hadoop_home = status_params.hadoop_home
hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")