Example #1
0
def hive(name=None):
  import params
    
  Directory(params.hive_etc_dir_prefix,
            mode=0755
  )

  # We should change configurations for client as well as for server.
  # The reason is that stale-configs are service-level, not component.
  Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list))
  for conf_dir in params.hive_conf_dirs_list:
    fill_conf_dir(conf_dir)

  XmlConfig("hive-site.xml",
            conf_dir=params.hive_config_dir,
            configurations=params.hive_site_config,
            configuration_attributes=params.config['configuration_attributes']['hive-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

  # Generate atlas-application.properties.xml file
  if has_atlas_in_cluster():
    atlas_hook_filepath = os.path.join(params.hive_config_dir, params.atlas_hook_filename)
    setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.hive_user, params.user_group)
  
  File(format("{hive_config_dir}/hive-env.sh"),
       owner=params.hive_user,
       group=params.user_group,
       content=InlineTemplate(params.hive_env_sh_template)
  )

  # On some OS this folder could be not exists, so we will create it before pushing there files
  Directory(params.limits_conf_dir,
            create_parents = True,
            owner='root',
            group='root'
            )

  File(os.path.join(params.limits_conf_dir, 'hive.conf'),
       owner='root',
       group='root',
       mode=0644,
       content=Template("hive.conf.j2")
       )

  File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
       content = DownloadSource(format("{jdk_location}{check_db_connection_jar_name}")),
       mode = 0644,
  )

  if name != "client":
    setup_non_client()
  if name == 'hiveserver2':
    setup_hiveserver2()
  if name == 'metastore':
    setup_metastore() # schematool work
Example #2
0
def hcat():
    import params

    Directory(
        params.hive_conf_dir,
        create_parents=True,
        owner=params.hcat_user,
        group=params.user_group,
    )

    Directory(
        params.hcat_conf_dir,
        create_parents=True,
        owner=params.hcat_user,
        group=params.user_group,
    )

    Directory(params.hcat_pid_dir,
              owner=params.webhcat_user,
              create_parents=True)

    XmlConfig(
        "hive-site.xml",
        conf_dir=params.hive_client_conf_dir,
        configurations=params.config['configurations']['hive-site'],
        configuration_attributes=params.config['configuration_attributes']
        ['hive-site'],
        owner=params.hive_user,
        group=params.user_group,
        mode=0644)

    File(format("{hcat_conf_dir}/hcat-env.sh"),
         owner=params.hcat_user,
         group=params.user_group,
         content=InlineTemplate(params.hcat_env_sh_template))

    # Generate atlas-application.properties.xml file
    if has_atlas_in_cluster():
        atlas_hook_filepath = os.path.join(params.hive_config_dir,
                                           params.atlas_hook_filename)
        setup_atlas_hook(SERVICE.HIVE,
                         params.hive_atlas_application_properties,
                         atlas_hook_filepath, params.hive_user,
                         params.user_group)
Example #3
0
def sqoop(type=None):
    import params
    Link(params.sqoop_lib + "/mysql-connector-java.jar",
         to='/usr/share/java/mysql-connector-java.jar')

    jdbc_connector()

    Directory(params.sqoop_conf_dir,
              owner=params.sqoop_user,
              group=params.user_group,
              create_parents=True)

    configs = {}
    configs.update(params.config['configurations']['sqoop-site'])

    XmlConfig(
        "sqoop-site.xml",
        conf_dir=params.sqoop_conf_dir,
        configurations=configs,
        configuration_attributes=params.config['configuration_attributes']
        ['sqoop-site'],
        owner=params.sqoop_user,
        group=params.user_group)

    # Generate atlas-application.properties.xml file and symlink the hook jars
    if has_atlas_in_cluster():
        atlas_hook_filepath = os.path.join(params.sqoop_conf_dir,
                                           params.atlas_hook_filename)
        setup_atlas_hook(SERVICE.SQOOP,
                         params.sqoop_atlas_application_properties,
                         atlas_hook_filepath, params.sqoop_user,
                         params.user_group)
        setup_atlas_jar_symlinks("sqoop", params.sqoop_lib)

    File(format("{sqoop_conf_dir}/sqoop-env.sh"),
         owner=params.sqoop_user,
         group=params.user_group,
         content=InlineTemplate(params.sqoop_env_sh_template))
    update_config_permissions(
        ["sqoop-env-template.sh", "sqoop-site-template.xml", "sqoop-site.xml"])
    pass
Example #4
0
hdfs_principal_name = config['configurations']['hadoop-env'][
    'hdfs_principal_name']

hdfs_site = config['configurations']['hdfs-site']
default_fs = config['configurations']['core-site']['fs.defaultFS']

dfs_type = default("/commandParams/dfs_type", "")

########################################################
############# Atlas related params #####################
########################################################
#region Atlas Hooks needed by Hive on Oozie
hive_atlas_application_properties = default(
    '/configurations/hive-atlas-application.properties', {})

if has_atlas_in_cluster():
    atlas_hook_filename = default(
        '/configurations/atlas-env/metadata_conf_file',
        'atlas-application.properties')
#endregion

import functools
#create partial functions with common arguments for every HdfsResource call
#to create/delete hdfs directory/file/copyfromlocal we need to call params.HdfsResource in code
HdfsResource = functools.partial(
    HdfsResource,
    user=hdfs_user,
    hdfs_resource_ignore_file=
    "/var/lib/ambari-agent/data/.hdfs_resource_ignore",
    security_enabled=security_enabled,
    keytab=hdfs_user_keytab,
Example #5
0
def copy_atlas_hive_hook_to_dfs_share_lib(upgrade_type=None, upgrade_direction=None):
  """
   If the Atlas Hive Hook direcotry is present, Atlas is installed, and this is the first Oozie Server,
  then copy the entire contents of that directory to the Oozie Sharelib in DFS, e.g.,
  /usr/$stack/$current_version/atlas/hook/hive/ -> hdfs:///user/oozie/share/lib/lib_$timetamp/hive

  :param upgrade_type: If in the middle of a stack upgrade, the type as UPGRADE_TYPE_ROLLING or UPGRADE_TYPE_NON_ROLLING
  :param upgrade_direction: If in the middle of a stack upgrade, the direction as Direction.UPGRADE or Direction.DOWNGRADE.
  """
  import params

  # Calculate the effective version since this code can also be called during EU/RU in the upgrade direction.
  effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(params.version)
  if not check_stack_feature(StackFeature.ATLAS_HOOK_SUPPORT, effective_version):
    return
    
  # Important that oozie_server_hostnames is sorted by name so that this only runs on a single Oozie server.
  if not (len(params.oozie_server_hostnames) > 0 and params.hostname == params.oozie_server_hostnames[0]):
    Logger.debug("Will not attempt to copy Atlas Hive hook to DFS since this is not the first Oozie Server "
                 "sorted by hostname.")
    return

  if not has_atlas_in_cluster():
    Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since Atlas is not installed on the cluster.")
    return

  if upgrade_type is not None and upgrade_direction == Direction.DOWNGRADE:
    Logger.debug("Will not attempt to copy Atlas Hve hook to DFS since in the middle of Rolling/Express upgrade "
                 "and performing a Downgrade.")
    return

  current_version = get_current_version()
  atlas_hive_hook_dir = format("{stack_root}/{current_version}/atlas/hook/hive/")
  if not os.path.exists(atlas_hive_hook_dir):
    Logger.error(format("ERROR. Atlas is installed in cluster but this Oozie server doesn't "
                        "contain directory {atlas_hive_hook_dir}"))
    return

  atlas_hive_hook_impl_dir = os.path.join(atlas_hive_hook_dir, "atlas-hive-plugin-impl")

  num_files = len([name for name in os.listdir(atlas_hive_hook_impl_dir) if os.path.exists(os.path.join(atlas_hive_hook_impl_dir, name))])
  Logger.info("Found %d files/directories inside Atlas Hive hook impl directory %s"% (num_files, atlas_hive_hook_impl_dir))

  # This can return over 100 files, so take the first 5 lines after "Available ShareLib"
  # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
  command = format(r'source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -shareliblist hive | grep "\[Available ShareLib\]" -A 5')
  code, out = checked_call(command, user=params.oozie_user, tries=10, try_sleep=5, logoutput=True)

  hive_sharelib_dir = __parse_sharelib_from_output(out)

  if hive_sharelib_dir is None:
    raise Fail("Could not parse Hive sharelib from output.")

  Logger.info(format("Parsed Hive sharelib = {hive_sharelib_dir} and will attempt to copy/replace {num_files} files to it from {atlas_hive_hook_impl_dir}"))

  params.HdfsResource(hive_sharelib_dir,
                      type="directory",
                      action="create_on_execute",
                      source=atlas_hive_hook_impl_dir,
                      user=params.hdfs_user,
                      owner=params.oozie_user,
                      group=params.hdfs_user,
                      mode=0755,
                      recursive_chown=True,
                      recursive_chmod=True,
                      replace_existing_files=True
                      )

  Logger.info("Copying Atlas Hive hook properties file to Oozie Sharelib in DFS.")
  atlas_hook_filepath_source = os.path.join(params.hive_conf_dir, params.atlas_hook_filename)
  atlas_hook_file_path_dest_in_dfs = os.path.join(hive_sharelib_dir, params.atlas_hook_filename)
  params.HdfsResource(atlas_hook_file_path_dest_in_dfs,
                      type="file",
                      source=atlas_hook_filepath_source,
                      action="create_on_execute",
                      owner=params.oozie_user,
                      group=params.hdfs_user,
                      mode=0755,
                      replace_existing_files=True
                      )
  params.HdfsResource(None, action="execute")

  # Update the sharelib after making any changes
  # Use -oozie http(s):localhost:{oozie_server_admin_port}/oozie as oozie-env does not export OOZIE_URL
  Execute(format("source {conf_dir}/oozie-env.sh ; oozie admin -oozie {oozie_base_url} -sharelibupdate"),
          user=params.oozie_user,
          tries=5,
          try_sleep=5,
          logoutput=True,
  )
Example #6
0
def oozie_server_specific(upgrade_type):
  import params
  
  no_op_test = as_user(format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1"), user=params.oozie_user)
  
  File(params.pid_file,
    action="delete",
    not_if=no_op_test
  )
  
  oozie_server_directories = [format("{oozie_home}/{oozie_tmp_dir}"), params.oozie_pid_dir, params.oozie_log_dir, params.oozie_tmp_dir, params.oozie_data_dir, params.oozie_lib_dir, params.oozie_webapps_dir, params.oozie_webapps_conf_dir, params.oozie_server_dir]
  Directory( oozie_server_directories,
    owner = params.oozie_user,
    group = params.user_group,
    mode = 0755,
    create_parents = True,
    cd_access="a",
  )
  
  Directory(params.oozie_libext_dir,
            create_parents = True,
  )
  
  hashcode_file = format("{oozie_home}/.hashcode")
  skip_recreate_sharelib = format("test -f {hashcode_file} && test -d {oozie_home}/share")

  untar_sharelib = ('tar','-xvf',format('{oozie_home}/oozie-sharelib.tar.gz'),'-C',params.oozie_home)

  Execute( untar_sharelib,    # time-expensive
    not_if  = format("{no_op_test} || {skip_recreate_sharelib}"), 
    sudo = True,
  )

  configure_cmds = []
  # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path
  source_ext_zip_paths = get_oozie_ext_zip_source_paths(upgrade_type, params)
  
  # Copy the first oozie ext-2.2.zip file that is found.
  # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP.
  if source_ext_zip_paths is not None:
    for source_ext_zip_path in source_ext_zip_paths:
      if os.path.isfile(source_ext_zip_path):
        configure_cmds.append(('cp', source_ext_zip_path, params.oozie_libext_dir))
        configure_cmds.append(('chown', format('{oozie_user}:{user_group}'), format('{oozie_libext_dir}/{ext_js_file}')))

        Execute(configure_cmds,
                not_if=no_op_test,
                sudo=True,
                )
        break
  
  
  Directory(params.oozie_webapps_conf_dir,
            owner = params.oozie_user,
            group = params.user_group,
            recursive_ownership = True,
            recursion_follow_links = True,
  )

  # download the database JAR
  download_database_library_if_needed()

  #falcon el extension
  if params.has_falcon_host:
    Execute(format('{sudo} cp {falcon_home}/oozie/ext/falcon-oozie-el-extension-*.jar {oozie_libext_dir}'),
      not_if  = no_op_test)

    Execute(format('{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'),
      not_if  = no_op_test)

  if params.lzo_enabled and len(params.all_lzo_packages) > 0:
    Package(params.all_lzo_packages,
            retry_on_repo_unavailability=params.agent_stack_retry_on_unavailability,
            retry_count=params.agent_stack_retry_count)
    Execute(format('{sudo} cp {hadoop_lib_home}/hadoop-lzo*.jar {oozie_lib_dir}'),
      not_if  = no_op_test,
    )

  prepare_war(params)

  File(hashcode_file,
       mode = 0644,
  )

  if params.stack_version_formatted and check_stack_feature(StackFeature.OOZIE_CREATE_HIVE_TEZ_CONFIGS, params.stack_version_formatted):
    # Create hive-site and tez-site configs for oozie
    Directory(params.hive_conf_dir,
        create_parents = True,
        owner = params.oozie_user,
        group = params.user_group
    )
    if 'hive-site' in params.config['configurations']:
      hive_site_config = update_credential_provider_path(params.config['configurations']['hive-site'],
                                                         'hive-site',
                                                         os.path.join(params.hive_conf_dir, 'hive-site.jceks'),
                                                         params.oozie_user,
                                                         params.user_group
                                                         )
      XmlConfig("hive-site.xml",
        conf_dir=params.hive_conf_dir,
        configurations=hive_site_config,
        configuration_attributes=params.config['configuration_attributes']['hive-site'],
        owner=params.oozie_user,
        group=params.user_group,
        mode=0644
    )
    if 'tez-site' in params.config['configurations']:
      XmlConfig( "tez-site.xml",
        conf_dir = params.hive_conf_dir,
        configurations = params.config['configurations']['tez-site'],
        configuration_attributes=params.config['configuration_attributes']['tez-site'],
        owner = params.oozie_user,
        group = params.user_group,
        mode = 0664
    )

    # If Atlas is also installed, need to generate Atlas Hive hook (hive-atlas-application.properties file) in directory
    # {stack_root}/{current_version}/atlas/hook/hive/
    # Because this is a .properties file instead of an xml file, it will not be read automatically by Oozie.
    # However, should still save the file on this host so that can upload it to the Oozie Sharelib in DFS.
    if has_atlas_in_cluster():
      atlas_hook_filepath = os.path.join(params.hive_conf_dir, params.atlas_hook_filename)
      Logger.info("Has atlas in cluster, will save Atlas Hive hook into location %s" % str(atlas_hook_filepath))
      setup_atlas_hook(SERVICE.HIVE, params.hive_atlas_application_properties, atlas_hook_filepath, params.oozie_user, params.user_group)

  Directory(params.oozie_server_dir,
    owner = params.oozie_user,
    group = params.user_group,
    recursive_ownership = True,  
  )
  if params.security_enabled:
    File(os.path.join(params.conf_dir, 'zkmigrator_jaas.conf'),
         owner=params.oozie_user,
         group=params.user_group,
         content=Template("zkmigrator_jaas.conf.j2")
         )
Example #7
0
def storm(name=None):
    import params
    import os

    Directory(
        params.log_dir,
        owner=params.storm_user,
        group=params.user_group,
        mode=0777,
        create_parents=True,
        cd_access="a",
    )

    Directory(
        [params.pid_dir, params.local_dir],
        owner=params.storm_user,
        group=params.user_group,
        create_parents=True,
        cd_access="a",
        mode=0755,
    )

    Directory(
        params.conf_dir,
        group=params.user_group,
        create_parents=True,
        cd_access="a",
    )

    File(format("{conf_dir}/config.yaml"),
         content=Template("config.yaml.j2"),
         owner=params.storm_user,
         group=params.user_group)

    configurations = params.config['configurations']['storm-site']

    File(format("{conf_dir}/storm.yaml"),
         content=yaml_config_template(configurations),
         owner=params.storm_user,
         group=params.user_group)

    File(format("{conf_dir}/storm-env.sh"),
         owner=params.storm_user,
         content=InlineTemplate(params.storm_env_sh_template))

    # Generate atlas-application.properties.xml file and symlink the hook jars
    if has_atlas_in_cluster():
        atlas_hook_filepath = os.path.join(params.conf_dir,
                                           params.atlas_hook_filename)
        setup_atlas_hook(SERVICE.STORM,
                         params.storm_atlas_application_properties,
                         atlas_hook_filepath, params.storm_user,
                         params.user_group)
        storm_extlib_dir = os.path.join(params.storm_component_home_dir,
                                        "extlib")
        setup_atlas_jar_symlinks("storm", storm_extlib_dir)

    if params.has_metric_collector:
        File(format("{conf_dir}/storm-metrics2.properties"),
             owner=params.storm_user,
             group=params.user_group,
             content=Template("storm-metrics2.properties.j2"))

        # Remove symlinks. They can be there, if you doing upgrade from HDP < 2.2 to HDP >= 2.2
        Link(format("{storm_lib_dir}/ambari-metrics-storm-sink.jar"),
             action="delete")
        # On old HDP 2.1 versions, this symlink may also exist and break EU to newer versions
        Link("/usr/lib/storm/lib/ambari-metrics-storm-sink.jar",
             action="delete")

        if check_stack_feature(StackFeature.STORM_METRICS_APACHE_CLASSES,
                               params.version_for_stack_feature_checks):
            sink_jar = params.metric_collector_sink_jar
        else:
            sink_jar = params.metric_collector_legacy_sink_jar

        Execute(format(
            "{sudo} ln -s {sink_jar} {storm_lib_dir}/ambari-metrics-storm-sink.jar"
        ),
                not_if=format(
                    "ls {storm_lib_dir}/ambari-metrics-storm-sink.jar"),
                only_if=format("ls {sink_jar}"))

    if params.storm_logs_supported:
        Directory(params.log4j_dir,
                  owner=params.storm_user,
                  group=params.user_group,
                  mode=0755,
                  create_parents=True)

        File(format("{log4j_dir}/cluster.xml"),
             owner=params.storm_user,
             content=InlineTemplate(params.storm_cluster_log4j_content))
        File(format("{log4j_dir}/worker.xml"),
             owner=params.storm_user,
             content=InlineTemplate(params.storm_worker_log4j_content))

    if params.security_enabled:
        TemplateConfig(format("{conf_dir}/storm_jaas.conf"),
                       owner=params.storm_user)
        if params.stack_version_formatted and check_stack_feature(
                StackFeature.ROLLING_UPGRADE, params.stack_version_formatted):
            TemplateConfig(format("{conf_dir}/client_jaas.conf"),
                           owner=params.storm_user)
            minRuid = configurations[
                '_storm.min.ruid'] if configurations.has_key(
                    '_storm.min.ruid') else ''

            min_user_ruid = int(
                minRuid) if minRuid.isdigit() else _find_real_user_min_uid()

            File(format("{conf_dir}/worker-launcher.cfg"),
                 content=Template("worker-launcher.cfg.j2",
                                  min_user_ruid=min_user_ruid),
                 owner='root',
                 group=params.user_group)
Example #8
0
def hive(name=None):
    import params

    if name == 'hiveserver2':
        # copy tarball to HDFS feature not supported
        if not (params.stack_version_formatted_major
                and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS,
                                        params.stack_version_formatted_major)):
            params.HdfsResource(params.webhcat_apps_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.webhcat_user,
                                mode=0755)

        # Create webhcat dirs.
        if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
            params.HdfsResource(params.hcat_hdfs_user_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.webhcat_user,
                                mode=params.hcat_hdfs_user_mode)

        params.HdfsResource(params.webhcat_hdfs_user_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.webhcat_user,
                            mode=params.webhcat_hdfs_user_mode)

        # ****** Begin Copy Tarballs ******
        # *********************************
        #  if copy tarball to HDFS feature  supported copy mapreduce.tar.gz and tez.tar.gz to HDFS
        if params.stack_version_formatted_major and check_stack_feature(
                StackFeature.COPY_TARBALL_TO_HDFS,
                params.stack_version_formatted_major):
            copy_to_hdfs("mapreduce",
                         params.user_group,
                         params.hdfs_user,
                         host_sys_prepped=params.host_sys_prepped)
            copy_to_hdfs("tez",
                         params.user_group,
                         params.hdfs_user,
                         host_sys_prepped=params.host_sys_prepped)

        # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode.
        # This can use a different source and dest location to account
        copy_to_hdfs("pig",
                     params.user_group,
                     params.hdfs_user,
                     file_mode=params.tarballs_mode,
                     custom_source_file=params.pig_tar_source,
                     custom_dest_file=params.pig_tar_dest_file,
                     host_sys_prepped=params.host_sys_prepped)
        copy_to_hdfs("hive",
                     params.user_group,
                     params.hdfs_user,
                     file_mode=params.tarballs_mode,
                     custom_source_file=params.hive_tar_source,
                     custom_dest_file=params.hive_tar_dest_file,
                     host_sys_prepped=params.host_sys_prepped)

        wildcard_tarballs = ["sqoop", "hadoop_streaming"]
        for tarball_name in wildcard_tarballs:
            source_file_pattern = eval("params." + tarball_name +
                                       "_tar_source")
            dest_dir = eval("params." + tarball_name + "_tar_dest_dir")

            if source_file_pattern is None or dest_dir is None:
                continue

            source_files = glob.glob(
                source_file_pattern) if "*" in source_file_pattern else [
                    source_file_pattern
                ]
            for source_file in source_files:
                src_filename = os.path.basename(source_file)
                dest_file = os.path.join(dest_dir, src_filename)

                copy_to_hdfs(tarball_name,
                             params.user_group,
                             params.hdfs_user,
                             file_mode=params.tarballs_mode,
                             custom_source_file=source_file,
                             custom_dest_file=dest_file,
                             host_sys_prepped=params.host_sys_prepped)
        # ******* End Copy Tarballs *******
        # *********************************

        # if warehouse directory is in DFS
        if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(
                params.default_fs).scheme:
            # Create Hive Metastore Warehouse Dir
            params.HdfsResource(params.hive_apps_whs_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.hive_user,
                                mode=0777)
        else:
            Logger.info(
                format(
                    "Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS."
                ))

        # Create Hive User Dir
        params.HdfsResource(params.hive_hdfs_user_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.hive_user,
                            mode=params.hive_hdfs_user_mode)

        if not is_empty(params.hive_exec_scratchdir) and not urlparse(
                params.hive_exec_scratchdir).path.startswith("/tmp"):
            params.HdfsResource(
                params.hive_exec_scratchdir,
                type="directory",
                action="create_on_execute",
                owner=params.hive_user,
                group=params.hdfs_user,
                mode=0777
            )  # Hive expects this dir to be writeable by everyone as it is used as a temp dir

        params.HdfsResource(None, action="execute")

    Directory(params.hive_etc_dir_prefix, mode=0755)

    # We should change configurations for client as well as for server.
    # The reason is that stale-configs are service-level, not component.
    Logger.info("Directories to fill with configs: %s" %
                str(params.hive_conf_dirs_list))
    for conf_dir in params.hive_conf_dirs_list:
        fill_conf_dir(conf_dir)

    XmlConfig(
        "hive-site.xml",
        conf_dir=params.hive_config_dir,
        configurations=params.hive_site_config,
        configuration_attributes=params.config['configuration_attributes']
        ['hive-site'],
        owner=params.hive_user,
        group=params.user_group,
        mode=0644)

    # Generate atlas-application.properties.xml file
    if has_atlas_in_cluster():
        atlas_hook_filepath = os.path.join(params.hive_config_dir,
                                           params.atlas_hook_filename)
        setup_atlas_hook(SERVICE.HIVE,
                         params.hive_atlas_application_properties,
                         atlas_hook_filepath, params.hive_user,
                         params.user_group)

    if name == 'hiveserver2':
        XmlConfig(
            "hiveserver2-site.xml",
            conf_dir=params.hive_server_conf_dir,
            configurations=params.config['configurations']['hiveserver2-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['hiveserver2-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

    if params.hive_metastore_site_supported and name == 'metastore':
        XmlConfig(
            "hivemetastore-site.xml",
            conf_dir=params.hive_server_conf_dir,
            configurations=params.config['configurations']
            ['hivemetastore-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['hivemetastore-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

    File(format("{hive_config_dir}/hive-env.sh"),
         owner=params.hive_user,
         group=params.user_group,
         content=InlineTemplate(params.hive_env_sh_template))

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              create_parents=True,
              owner='root',
              group='root')

    File(os.path.join(params.limits_conf_dir, 'hive.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("hive.conf.j2"))

    if name == 'metastore' or name == 'hiveserver2':
        if params.hive_jdbc_target is not None and not os.path.exists(
                params.hive_jdbc_target):
            jdbc_connector(params.hive_jdbc_target,
                           params.hive_previous_jdbc_jar)
        if params.hive2_jdbc_target is not None and not os.path.exists(
                params.hive2_jdbc_target):
            jdbc_connector(params.hive2_jdbc_target,
                           params.hive2_previous_jdbc_jar)

    File(
        format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
        content=DownloadSource(
            format("{jdk_location}{check_db_connection_jar_name}")),
        mode=0644,
    )

    if name == 'metastore':
        File(os.path.join(params.hive_server_conf_dir,
                          "hadoop-metrics2-hivemetastore.properties"),
             owner=params.hive_user,
             group=params.user_group,
             content=Template("hadoop-metrics2-hivemetastore.properties.j2"))

        File(params.start_metastore_path,
             mode=0755,
             content=StaticFile('startMetastore.sh'))
        if params.init_metastore_schema:
            create_schema_cmd = format(
                "export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                "{hive_schematool_bin}/schematool -initSchema "
                "-dbType {hive_metastore_db_type} "
                "-userName {hive_metastore_user_name} "
                "-passWord {hive_metastore_user_passwd!p} -verbose")

            check_schema_created_cmd = as_user(
                format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                       "{hive_schematool_bin}/schematool -info "
                       "-dbType {hive_metastore_db_type} "
                       "-userName {hive_metastore_user_name} "
                       "-passWord {hive_metastore_user_passwd!p} -verbose"),
                params.hive_user)

            # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords.
            # Fixing it with the hack below:
            quoted_hive_metastore_user_passwd = quote_bash_args(
                quote_bash_args(params.hive_metastore_user_passwd))
            if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \
                or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"':
                quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[
                    1:-1]
            Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(
                check_schema_created_cmd.replace(
                    format("-passWord {quoted_hive_metastore_user_passwd}"),
                    "-passWord " + utils.PASSWORDS_HIDE_STRING))

            Execute(create_schema_cmd,
                    not_if=check_schema_created_cmd,
                    user=params.hive_user)
    elif name == 'hiveserver2':
        File(params.start_hiveserver2_path,
             mode=0755,
             content=Template(format('{start_hiveserver2_script}')))

        File(os.path.join(params.hive_server_conf_dir,
                          "hadoop-metrics2-hiveserver2.properties"),
             owner=params.hive_user,
             group=params.user_group,
             content=Template("hadoop-metrics2-hiveserver2.properties.j2"))

    if name != "client":
        Directory(params.hive_pid_dir,
                  create_parents=True,
                  cd_access='a',
                  owner=params.hive_user,
                  group=params.user_group,
                  mode=0755)
        Directory(params.hive_log_dir,
                  create_parents=True,
                  cd_access='a',
                  owner=params.hive_user,
                  group=params.user_group,
                  mode=0755)
        Directory(params.hive_var_lib,
                  create_parents=True,
                  cd_access='a',
                  owner=params.hive_user,
                  group=params.user_group,
                  mode=0755)
Example #9
0
def webhcat():
    import params

    Directory(params.templeton_pid_dir,
              owner=params.webhcat_user,
              mode=0755,
              group=params.user_group,
              create_parents=True)

    Directory(params.templeton_log_dir,
              owner=params.webhcat_user,
              mode=0755,
              group=params.user_group,
              create_parents=True)

    Directory(params.config_dir,
              create_parents=True,
              owner=params.webhcat_user,
              group=params.user_group,
              cd_access="a")

    if params.security_enabled:
        kinit_if_needed = format(
            "{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
    else:
        kinit_if_needed = ""

    if kinit_if_needed:
        Execute(kinit_if_needed, user=params.webhcat_user, path='/bin')

    # Replace _HOST with hostname in relevant principal-related properties
    webhcat_site = params.config['configurations']['webhcat-site'].copy()
    for prop_name in [
            'templeton.hive.properties', 'templeton.kerberos.principal'
    ]:
        if prop_name in webhcat_site:
            webhcat_site[prop_name] = webhcat_site[prop_name].replace(
                "_HOST", params.hostname)

    XmlConfig(
        "webhcat-site.xml",
        conf_dir=params.config_dir,
        configurations=webhcat_site,
        configuration_attributes=params.config['configuration_attributes']
        ['webhcat-site'],
        owner=params.webhcat_user,
        group=params.user_group,
    )

    # if we're in an upgrade of a secure cluster, make sure hive-site and yarn-site are created
    if params.stack_version_formatted_major  and check_stack_feature(StackFeature.CONFIG_VERSIONING, params.stack_version_formatted_major) and \
         params.version and params.stack_root:
        XmlConfig(
            "hive-site.xml",
            conf_dir=format("{stack_root}/{version}/hive/conf"),
            configurations=params.config['configurations']['hive-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['hive-site'],
            owner=params.hive_user,
            group=params.user_group,
        )

        XmlConfig(
            "yarn-site.xml",
            conf_dir=format("{stack_root}/{version}/hadoop/conf"),
            configurations=params.config['configurations']['yarn-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['yarn-site'],
            owner=params.yarn_user,
            group=params.user_group,
        )

    File(format("{config_dir}/webhcat-env.sh"),
         owner=params.webhcat_user,
         group=params.user_group,
         content=InlineTemplate(params.webhcat_env_sh_template))

    Directory(params.webhcat_conf_dir, cd_access='a', create_parents=True)

    log4j_webhcat_filename = 'webhcat-log4j.properties'
    if (params.log4j_webhcat_props != None):
        File(format("{config_dir}/{log4j_webhcat_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.webhcat_user,
             content=params.log4j_webhcat_props)
    elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")):
        File(format("{config_dir}/{log4j_webhcat_filename}"),
             mode=0644,
             group=params.user_group,
             owner=params.webhcat_user,
             content=StaticFile(
                 format("{config_dir}/{log4j_webhcat_filename}.template")))

    # Generate atlas-application.properties.xml file
    if has_atlas_in_cluster():
        # WebHCat uses a different config dir than the rest of the daemons in Hive.
        atlas_hook_filepath = os.path.join(params.config_dir,
                                           params.atlas_hook_filename)
        setup_atlas_hook(SERVICE.HIVE,
                         params.hive_atlas_application_properties,
                         atlas_hook_filepath, params.hive_user,
                         params.user_group)
Example #10
0
def falcon(type, action = None, upgrade_type=None):
  import params

  if action == 'config':
    Directory(params.falcon_pid_dir,
      owner = params.falcon_user,
      create_parents = True,
      mode = 0755,
      cd_access = "a",
    )

    Directory(params.falcon_log_dir,
      owner = params.falcon_user,
      create_parents = True,
      mode = 0755,
      cd_access = "a",
    )

    Directory(params.falcon_webapp_dir,
      owner = params.falcon_user,
      create_parents = True)

    Directory(params.falcon_home,
      owner = params.falcon_user,
      create_parents = True)

    Directory(params.etc_prefix_dir,
      mode = 0755,
      create_parents = True)

    Directory(params.falcon_conf_dir,
      owner = params.falcon_user,
      create_parents = True)

    File(params.falcon_conf_dir + '/falcon-env.sh',
      content = InlineTemplate(params.falcon_env_sh_template),
      owner = params.falcon_user,
      group=params.user_group,
    )
    
    PropertiesFile(params.falcon_conf_dir + '/client.properties',
      properties = params.falcon_client_properties,
      mode = 0644,
      owner = params.falcon_user)
      
    PropertiesFile(params.falcon_conf_dir + '/runtime.properties',
      properties = params.falcon_runtime_properties,
      mode = 0644,
      owner = params.falcon_user)

    PropertiesFile(params.falcon_conf_dir + '/startup.properties',
      properties = params.falcon_startup_properties,
      mode = 0644,
      owner = params.falcon_user)

    if params.falcon_graph_storage_directory:
      Directory(params.falcon_graph_storage_directory,
        owner = params.falcon_user,
        group = params.user_group,
        mode = 0775,
        create_parents = True,
        cd_access = "a")

    if params.falcon_graph_serialize_path:
      Directory(params.falcon_graph_serialize_path,
        owner = params.falcon_user,
        group = params.user_group,
        mode = 0775,
        create_parents = True,
        cd_access = "a")

    # Generate atlas-application.properties.xml file
    if has_atlas_in_cluster():
      atlas_hook_filepath = os.path.join(params.falcon_conf_dir, params.atlas_hook_filename)
      setup_atlas_hook(SERVICE.FALCON, params.falcon_atlas_application_properties, atlas_hook_filepath, params.falcon_user, params.user_group)

  if type == 'server':
    if action == 'config':
      if params.store_uri[0:4] == "hdfs":
        params.HdfsResource(params.store_uri,
          type = "directory",
          action = "create_on_execute",
          owner = params.falcon_user,
          mode = 0755)
      elif params.store_uri[0:4] == "file":
        Directory(params.store_uri[7:],
          owner = params.falcon_user,
          create_parents = True)

      # TODO change to proper mode
      params.HdfsResource(params.falcon_apps_dir,
        type = "directory",
        action = "create_on_execute",
        owner = params.falcon_user,
        mode = 0777)

      # In HDP 2.4 and earlier, the data-mirroring directory was copied to HDFS.
      if params.supports_data_mirroring:
        params.HdfsResource(params.dfs_data_mirroring_dir,
          type = "directory",
          action = "create_on_execute",
          owner = params.falcon_user,
          group = params.proxyuser_group,
          recursive_chown = True,
          recursive_chmod = True,
          mode = 0770,
          source = params.local_data_mirroring_dir)

      if params.supports_falcon_extensions:

        params.HdfsResource(params.falcon_extensions_dest_dir,
                            type = "directory",
                            action = "create_on_execute",
                            owner = params.falcon_user,
                            group = params.proxyuser_group,
                            recursive_chown = True,
                            recursive_chmod = True,
                            mode = 0755,
                            source = params.falcon_extensions_source_dir)
        # Create the extensons HiveDR store
        params.HdfsResource(os.path.join(params.falcon_extensions_dest_dir, "mirroring"),
                            type = "directory",
                            action = "create_on_execute",
                            owner = params.falcon_user,
                            group = params.proxyuser_group,
                            mode = 0770)

      # At least one HDFS Dir should be created, so execute the change now.
      params.HdfsResource(None, action = "execute")

      Directory(params.falcon_local_dir,
        owner = params.falcon_user,
        create_parents = True,
        cd_access = "a")

      if params.falcon_embeddedmq_enabled == True:
        Directory(
          os.path.abspath(os.path.join(params.falcon_embeddedmq_data, "..")),
          owner = params.falcon_user,
          create_parents = True)

        Directory(params.falcon_embeddedmq_data,
          owner = params.falcon_user,
          create_parents = True)

    # although Falcon's falcon-config.sh will use 'which hadoop' to figure
    # this out, in an upgraded cluster, it's possible that 'which hadoop'
    # still points to older binaries; it's safer to just pass in the
    # hadoop home directory to use
    environment_dictionary = { "HADOOP_HOME" : params.hadoop_home_dir }

    pid = get_user_call_output.get_user_call_output(format("cat {server_pid_file}"), user=params.falcon_user, is_checked_call=False)[1]
    process_exists = format("ls {server_pid_file} && ps -p {pid}")

    if action == 'start':
      if not os.path.exists(params.target_jar_file):
        try :
          File(params.target_jar_file,
           content = DownloadSource(params.bdb_resource_name))
        except :
           exc_msg = traceback.format_exc()
           exception_message = format("Caught Exception while downloading {bdb_resource_name}:\n{exc_msg}")
           Logger.error(exception_message)

        if not os.path.isfile(params.target_jar_file) :
          error_message = """
If you are using bdb as the Falcon graph db store, please run
ambari-server setup --jdbc-db=bdb --jdbc-driver=<path to je5.0.73.jar
on the ambari server host.  Otherwise falcon startup will fail.
Otherwise please configure Falcon to use HBase as the backend as described
in the Falcon documentation.
"""
          Logger.error(error_message)
      try:
        Execute(format('{falcon_home}/bin/falcon-start -port {falcon_port}'),
          user = params.falcon_user,
          path = params.hadoop_bin_dir,
          environment=environment_dictionary,
          not_if = process_exists,
        )
      except:
        show_logs(params.falcon_log_dir, params.falcon_user)
        raise

    if action == 'stop':
      try:
        Execute(format('{falcon_home}/bin/falcon-stop'),
          user = params.falcon_user,
          path = params.hadoop_bin_dir,
          environment=environment_dictionary)
      except:
        show_logs(params.falcon_log_dir, params.falcon_user)
        raise
      
      File(params.server_pid_file, action = 'delete')