Example #1
0
def hcat():
    import params

    from setup_atlas_hive import setup_atlas_hive

    Directory(
        params.hive_conf_dir,
        create_parents=True,
        owner=params.hcat_user,
        group=params.user_group,
    )

    Directory(
        params.hcat_conf_dir,
        create_parents=True,
        owner=params.hcat_user,
        group=params.user_group,
    )

    Directory(params.hcat_pid_dir,
              owner=params.webhcat_user,
              create_parents=True)

    XmlConfig(
        "hive-site.xml",
        conf_dir=params.hive_client_conf_dir,
        configurations=params.config['configurations']['hive-site'],
        configuration_attributes=params.config['configuration_attributes']
        ['hive-site'],
        owner=params.hive_user,
        group=params.user_group,
        mode=0644)

    File(format("{hcat_conf_dir}/hcat-env.sh"),
         owner=params.hcat_user,
         group=params.user_group,
         content=InlineTemplate(params.hcat_env_sh_template))

    setup_atlas_hive()
Example #2
0
def hcat():
  import params

  from setup_atlas_hive import setup_atlas_hive

  Directory(params.hive_conf_dir,
            recursive=True,
            owner=params.hcat_user,
            group=params.user_group,
  )


  Directory(params.hcat_conf_dir,
            recursive=True,
            owner=params.hcat_user,
            group=params.user_group,
  )

  Directory(params.hcat_pid_dir,
            owner=params.webhcat_user,
            recursive=True
  )

  XmlConfig("hive-site.xml",
            conf_dir=params.hive_client_conf_dir,
            configurations=params.hive_site_config,
            configuration_attributes=params.config['configuration_attributes']['hive-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

  File(format("{hcat_conf_dir}/hcat-env.sh"),
       owner=params.hcat_user,
       group=params.user_group,
       content=InlineTemplate(params.hcat_env_sh_template)
  )

  setup_atlas_hive()
Example #3
0
def hive(name=None):
  import params

  if name == 'hiveserver2':
    # copy tarball to HDFS feature not supported
    if not (params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major)):  
      params.HdfsResource(params.webhcat_apps_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.webhcat_user,
                            mode=0755
                          )
    
    # Create webhcat dirs.
    if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
      params.HdfsResource(params.hcat_hdfs_user_dir,
                           type="directory",
                           action="create_on_execute",
                           owner=params.hcat_user,
                           mode=params.hcat_hdfs_user_mode
      )

    params.HdfsResource(params.webhcat_hdfs_user_dir,
                         type="directory",
                         action="create_on_execute",
                         owner=params.webhcat_user,
                         mode=params.webhcat_hdfs_user_mode
    )

    # ****** Begin Copy Tarballs ******
    # *********************************
    #  if copy tarball to HDFS feature  supported copy mapreduce.tar.gz and tez.tar.gz to HDFS
    if params.stack_version_formatted_major and check_stack_feature(StackFeature.COPY_TARBALL_TO_HDFS, params.stack_version_formatted_major):
      copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)

    # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode.
    # This can use a different source and dest location to account
    copy_to_hdfs("pig",
                 params.user_group,
                 params.hdfs_user,
                 file_mode=params.tarballs_mode,
                 custom_source_file=params.pig_tar_source,
                 custom_dest_file=params.pig_tar_dest_file,
                 host_sys_prepped=params.host_sys_prepped)
    copy_to_hdfs("hive",
                 params.user_group,
                 params.hdfs_user,
                 file_mode=params.tarballs_mode,
                 custom_source_file=params.hive_tar_source,
                 custom_dest_file=params.hive_tar_dest_file,
                 host_sys_prepped=params.host_sys_prepped)

    wildcard_tarballs = ["sqoop", "hadoop_streaming"]
    for tarball_name in wildcard_tarballs:
      source_file_pattern = eval("params." + tarball_name + "_tar_source")
      dest_dir = eval("params." + tarball_name + "_tar_dest_dir")

      if source_file_pattern is None or dest_dir is None:
        continue

      source_files = glob.glob(source_file_pattern) if "*" in source_file_pattern else [source_file_pattern]
      for source_file in source_files:
        src_filename = os.path.basename(source_file)
        dest_file = os.path.join(dest_dir, src_filename)

        copy_to_hdfs(tarball_name,
                     params.user_group,
                     params.hdfs_user,
                     file_mode=params.tarballs_mode,
                     custom_source_file=source_file,
                     custom_dest_file=dest_file,
                     host_sys_prepped=params.host_sys_prepped)
    # ******* End Copy Tarballs *******
    # *********************************
    
    # if warehouse directory is in DFS
    if not params.whs_dir_protocol or params.whs_dir_protocol == urlparse(params.default_fs).scheme:
      # Create Hive Metastore Warehouse Dir
      params.HdfsResource(params.hive_apps_whs_dir,
                           type="directory",
                            action="create_on_execute",
                            owner=params.hive_user,
                            mode=0777
      )
    else:
      Logger.info(format("Not creating warehouse directory '{hive_apps_whs_dir}', as the location is not in DFS."))

    # Create Hive User Dir
    params.HdfsResource(params.hive_hdfs_user_dir,
                         type="directory",
                          action="create_on_execute",
                          owner=params.hive_user,
                          mode=params.hive_hdfs_user_mode
    )
    
    if not is_empty(params.hive_exec_scratchdir) and not urlparse(params.hive_exec_scratchdir).path.startswith("/tmp"):
      params.HdfsResource(params.hive_exec_scratchdir,
                           type="directory",
                           action="create_on_execute",
                           owner=params.hive_user,
                           group=params.hdfs_user,
                           mode=0777) # Hive expects this dir to be writeable by everyone as it is used as a temp dir
      
    params.HdfsResource(None, action="execute")

  Directory(params.hive_etc_dir_prefix,
            mode=0755
  )

  # We should change configurations for client as well as for server.
  # The reason is that stale-configs are service-level, not component.
  Logger.info("Directories to fill with configs: %s" % str(params.hive_conf_dirs_list))
  for conf_dir in params.hive_conf_dirs_list:
    fill_conf_dir(conf_dir)

  XmlConfig("hive-site.xml",
            conf_dir=params.hive_config_dir,
            configurations=params.hive_site_config,
            configuration_attributes=params.config['configuration_attributes']['hive-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

  setup_atlas_hive()
  
  if params.hive_specific_configs_supported and name == 'hiveserver2':
    XmlConfig("hiveserver2-site.xml",
              conf_dir=params.hive_server_conf_dir,
              configurations=params.config['configurations']['hiveserver2-site'],
              configuration_attributes=params.config['configuration_attributes']['hiveserver2-site'],
              owner=params.hive_user,
              group=params.user_group,
              mode=0644)

  if params.hive_metastore_site_supported and name == 'metastore':
    XmlConfig("hivemetastore-site.xml",
              conf_dir=params.hive_server_conf_dir,
              configurations=params.config['configurations']['hivemetastore-site'],
              configuration_attributes=params.config['configuration_attributes']['hivemetastore-site'],
              owner=params.hive_user,
              group=params.user_group,
              mode=0644)
  
  File(format("{hive_config_dir}/hive-env.sh"),
       owner=params.hive_user,
       group=params.user_group,
       content=InlineTemplate(params.hive_env_sh_template)
  )

  # On some OS this folder could be not exists, so we will create it before pushing there files
  Directory(params.limits_conf_dir,
            create_parents = True,
            owner='root',
            group='root'
            )

  File(os.path.join(params.limits_conf_dir, 'hive.conf'),
       owner='root',
       group='root',
       mode=0644,
       content=Template("hive.conf.j2")
       )

  if name == 'metastore' or name == 'hiveserver2':
    if params.target_hive is not None and not os.path.exists(params.target_hive):
      jdbc_connector(params.target_hive)
    if params.target_hive2 is not None and not os.path.exists(params.target_hive2):
      jdbc_connector(params.target_hive2)

  File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
       content = DownloadSource(format("{jdk_location}{check_db_connection_jar_name}")),
       mode = 0644,
  )

  if name == 'metastore':
    File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hivemetastore.properties"),
         owner=params.hive_user,
         group=params.user_group,
         content=Template("hadoop-metrics2-hivemetastore.properties.j2")
    )

    File(params.start_metastore_path,
         mode=0755,
         content=StaticFile('startMetastore.sh')
    )
    if params.init_metastore_schema:
      create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                                 "{hive_schematool_bin}/schematool -initSchema "
                                 "-dbType {hive_metastore_db_type} "
                                 "-userName {hive_metastore_user_name} "
                                 "-passWord {hive_metastore_user_passwd!p} -verbose")

      check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                                        "{hive_schematool_bin}/schematool -info "
                                        "-dbType {hive_metastore_db_type} "
                                        "-userName {hive_metastore_user_name} "
                                        "-passWord {hive_metastore_user_passwd!p} -verbose"), params.hive_user)

      # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords.
      # Fixing it with the hack below:
      quoted_hive_metastore_user_passwd = quote_bash_args(quote_bash_args(params.hive_metastore_user_passwd))
      if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \
          or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"':
        quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[1:-1]
      Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(check_schema_created_cmd.replace(
          format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING))

      Execute(create_schema_cmd,
              not_if = check_schema_created_cmd,
              user = params.hive_user
      )
  elif name == 'hiveserver2':
    File(params.start_hiveserver2_path,
         mode=0755,
         content=Template(format('{start_hiveserver2_script}'))
    )

    File(os.path.join(params.hive_server_conf_dir, "hadoop-metrics2-hiveserver2.properties"),
         owner=params.hive_user,
         group=params.user_group,
         content=Template("hadoop-metrics2-hiveserver2.properties.j2")
    )

  if name != "client":
    Directory(params.hive_pid_dir,
              create_parents = True,
              cd_access='a',
              owner=params.hive_user,
              group=params.user_group,
              mode=0755)
    Directory(params.hive_log_dir,
              create_parents = True,
              cd_access='a',
              owner=params.hive_user,
              group=params.user_group,
              mode=0755)
    Directory(params.hive_var_lib,
              create_parents = True,
              cd_access='a',
              owner=params.hive_user,
              group=params.user_group,
              mode=0755)
Example #4
0
def webhcat():
  import params

  from setup_atlas_hive import setup_atlas_hive

  Directory(params.templeton_pid_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            recursive=True)

  Directory(params.templeton_log_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            recursive=True)

  Directory(params.config_dir,
            recursive=True,
            owner=params.webhcat_user,
            group=params.user_group,
            cd_access="a")

  if params.security_enabled:
    kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
  else:
    kinit_if_needed = ""

  if kinit_if_needed:
    Execute(kinit_if_needed,
            user=params.webhcat_user,
            path='/bin'
    )

  # Replace _HOST with hostname in relevant principal-related properties
  webhcat_site = params.config['configurations']['webhcat-site'].copy()
  for prop_name in ['templeton.hive.properties', 'templeton.kerberos.principal']:
    if prop_name in webhcat_site:
      webhcat_site[prop_name] = webhcat_site[prop_name].replace("_HOST", params.hostname)

  XmlConfig("webhcat-site.xml",
            conf_dir=params.config_dir,
            configurations=webhcat_site,
            configuration_attributes=params.config['configuration_attributes']['webhcat-site'],
            owner=params.webhcat_user,
            group=params.user_group,
            )

  # if we're in an upgrade of a secure cluster, make sure hive-site and yarn-site are created
  if Script.is_hdp_stack_greater_or_equal("2.3") and params.version:
    XmlConfig("hive-site.xml",
      conf_dir = format("/usr/hdp/{version}/hive/conf"),
      configurations = params.config['configurations']['hive-site'],
      configuration_attributes = params.config['configuration_attributes']['hive-site'],
      owner = params.hive_user,
      group = params.user_group,
      )

    XmlConfig("yarn-site.xml",
      conf_dir = format("/usr/hdp/{version}/hadoop/conf"),
      configurations = params.config['configurations']['yarn-site'],
      configuration_attributes = params.config['configuration_attributes']['yarn-site'],
      owner = params.yarn_user,
      group = params.user_group,    
  )
  

  File(format("{config_dir}/webhcat-env.sh"),
       owner=params.webhcat_user,
       group=params.user_group,
       content=InlineTemplate(params.webhcat_env_sh_template)
  )
  
  Directory(params.webhcat_conf_dir,
       cd_access='a',
       recursive=True
  )

  log4j_webhcat_filename = 'webhcat-log4j.properties'
  if (params.log4j_webhcat_props != None):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=params.log4j_webhcat_props
    )
  elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=StaticFile(format("{config_dir}/{log4j_webhcat_filename}.template"))
    )

  setup_atlas_hive(configuration_directory=params.config_dir)
Example #5
0
def webhcat():
  import params

  from setup_atlas_hive import setup_atlas_hive

  Directory(params.templeton_pid_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            create_parents = True)

  Directory(params.templeton_log_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            create_parents = True)

  Directory(params.config_dir,
            create_parents = True,
            owner=params.webhcat_user,
            group=params.user_group,
            cd_access="a")

  if params.security_enabled:
    kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
  else:
    kinit_if_needed = ""

  if kinit_if_needed:
    Execute(kinit_if_needed,
            user=params.webhcat_user,
            path='/bin'
    )

  # Replace _HOST with hostname in relevant principal-related properties
  webhcat_site = params.config['configurations']['webhcat-site'].copy()
  for prop_name in ['templeton.hive.properties', 'templeton.kerberos.principal']:
    if prop_name in webhcat_site:
      webhcat_site[prop_name] = webhcat_site[prop_name].replace("_HOST", params.hostname)

  XmlConfig("webhcat-site.xml",
            conf_dir=params.config_dir,
            configurations=webhcat_site,
            configuration_attributes=params.config['configuration_attributes']['webhcat-site'],
            owner=params.webhcat_user,
            group=params.user_group,
            )

  # if we're in an upgrade of a secure cluster, make sure hive-site and yarn-site are created
  if params.stack_version_formatted_major  and check_stack_feature(StackFeature.CONFIG_VERSIONING, params.stack_version_formatted_major) and \
       params.version and params.stack_root:
    XmlConfig("hive-site.xml",
      conf_dir = format("{stack_root}/{version}/hive/conf"),
      configurations = params.config['configurations']['hive-site'],
      configuration_attributes = params.config['configuration_attributes']['hive-site'],
      owner = params.hive_user,
      group = params.user_group,
      )

    XmlConfig("yarn-site.xml",
      conf_dir = format("{stack_root}/{version}/hadoop/conf"),
      configurations = params.config['configurations']['yarn-site'],
      configuration_attributes = params.config['configuration_attributes']['yarn-site'],
      owner = params.yarn_user,
      group = params.user_group,    
  )
  

  File(format("{config_dir}/webhcat-env.sh"),
       owner=params.webhcat_user,
       group=params.user_group,
       content=InlineTemplate(params.webhcat_env_sh_template)
  )
  
  Directory(params.webhcat_conf_dir,
       cd_access='a',
       create_parents = True
  )

  log4j_webhcat_filename = 'webhcat-log4j.properties'
  if (params.log4j_webhcat_props != None):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=params.log4j_webhcat_props
    )
  elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=StaticFile(format("{config_dir}/{log4j_webhcat_filename}.template"))
    )

  setup_atlas_hive(configuration_directory=params.config_dir)
Example #6
0
def hive(name=None):
    import params

    if name == 'hiveserver2':
        # HDP 2.1.* or lower
        if params.hdp_stack_version_major != "" and compare_versions(
                params.hdp_stack_version_major, "2.2.0.0") < 0:
            params.HdfsResource(params.webhcat_apps_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.webhcat_user,
                                mode=0755)

        # Create webhcat dirs.
        if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
            params.HdfsResource(params.hcat_hdfs_user_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.hcat_user,
                                mode=params.hcat_hdfs_user_mode)

        params.HdfsResource(params.webhcat_hdfs_user_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.webhcat_user,
                            mode=params.webhcat_hdfs_user_mode)

        # ****** Begin Copy Tarballs ******
        # *********************************
        # HDP 2.2 or higher, copy mapreduce.tar.gz to HDFS
        if params.hdp_stack_version_major != "" and compare_versions(
                params.hdp_stack_version_major, '2.2') >= 0:
            copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user)
            copy_to_hdfs("tez", params.user_group, params.hdfs_user)

        # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode.
        # This can use a different source and dest location to account for both HDP 2.1 and 2.2
        copy_to_hdfs("pig",
                     params.user_group,
                     params.hdfs_user,
                     file_mode=params.tarballs_mode,
                     custom_source_file=params.pig_tar_source,
                     custom_dest_file=params.pig_tar_dest_file)
        copy_to_hdfs("hive",
                     params.user_group,
                     params.hdfs_user,
                     file_mode=params.tarballs_mode,
                     custom_source_file=params.hive_tar_source,
                     custom_dest_file=params.hive_tar_dest_file)

        wildcard_tarballs = ["sqoop", "hadoop_streaming"]
        for tarball_name in wildcard_tarballs:
            source_file_pattern = eval("params." + tarball_name +
                                       "_tar_source")
            dest_dir = eval("params." + tarball_name + "_tar_dest_dir")

            if source_file_pattern is None or dest_dir is None:
                continue

            source_files = glob.glob(
                source_file_pattern) if "*" in source_file_pattern else [
                    source_file_pattern
                ]
            for source_file in source_files:
                src_filename = os.path.basename(source_file)
                dest_file = os.path.join(dest_dir, src_filename)

                copy_to_hdfs(tarball_name,
                             params.user_group,
                             params.hdfs_user,
                             file_mode=params.tarballs_mode,
                             custom_source_file=source_file,
                             custom_dest_file=dest_file)
        # ******* End Copy Tarballs *******
        # *********************************

        # Create Hive Metastore Warehouse Dir
        params.HdfsResource(params.hive_apps_whs_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.hive_user,
                            mode=0777)

        # Create Hive User Dir
        params.HdfsResource(params.hive_hdfs_user_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.hive_user,
                            mode=params.hive_hdfs_user_mode)

        if not is_empty(params.hive_exec_scratchdir) and not urlparse(
                params.hive_exec_scratchdir).path.startswith("/tmp"):
            params.HdfsResource(
                params.hive_exec_scratchdir,
                type="directory",
                action="create_on_execute",
                owner=params.hive_user,
                group=params.hdfs_user,
                mode=0777
            )  # Hive expects this dir to be writeable by everyone as it is used as a temp dir

        params.HdfsResource(None, action="execute")

    Directory(params.hive_etc_dir_prefix, mode=0755)

    # We should change configurations for client as well as for server.
    # The reason is that stale-configs are service-level, not component.
    for conf_dir in params.hive_conf_dirs_list:
        fill_conf_dir(conf_dir)

    XmlConfig(
        "hive-site.xml",
        conf_dir=params.hive_config_dir,
        configurations=params.hive_site_config,
        configuration_attributes=params.config['configuration_attributes']
        ['hive-site'],
        owner=params.hive_user,
        group=params.user_group,
        mode=0644)

    setup_atlas_hive()

    if params.hive_specific_configs_supported and name == 'hiveserver2':
        XmlConfig(
            "hiveserver2-site.xml",
            conf_dir=params.hive_server_conf_dir,
            configurations=params.config['configurations']['hiveserver2-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['hiveserver2-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

    File(format("{hive_config_dir}/hive-env.sh"),
         owner=params.hive_user,
         group=params.user_group,
         content=InlineTemplate(params.hive_env_sh_template))

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              recursive=True,
              owner='root',
              group='root')

    File(os.path.join(params.limits_conf_dir, 'hive.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("hive.conf.j2"))

    if name == 'metastore' or name == 'hiveserver2':
        jdbc_connector()

    File(
        format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
        content=DownloadSource(
            format("{jdk_location}{check_db_connection_jar_name}")),
        mode=0644,
    )

    if name == 'metastore':
        File(params.start_metastore_path,
             mode=0755,
             content=StaticFile('startMetastore.sh'))
        if params.init_metastore_schema:
            create_schema_cmd = format(
                "export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                "{hive_bin}/schematool -initSchema "
                "-dbType {hive_metastore_db_type} "
                "-userName {hive_metastore_user_name} "
                "-passWord {hive_metastore_user_passwd!p}")

            check_schema_created_cmd = as_user(
                format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                       "{hive_bin}/schematool -info "
                       "-dbType {hive_metastore_db_type} "
                       "-userName {hive_metastore_user_name} "
                       "-passWord {hive_metastore_user_passwd!p}"),
                params.hive_user)

            Execute(create_schema_cmd,
                    not_if=check_schema_created_cmd,
                    user=params.hive_user)
    elif name == 'hiveserver2':
        File(params.start_hiveserver2_path,
             mode=0755,
             content=Template(format('{start_hiveserver2_script}')))

    if name != "client":
        crt_directory(params.hive_pid_dir)
        crt_directory(params.hive_log_dir)
        crt_directory(params.hive_var_lib)
Example #7
0
def webhcat():
  import params

  from setup_atlas_hive import setup_atlas_hive

  Directory(params.templeton_pid_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            recursive=True)

  Directory(params.templeton_log_dir,
            owner=params.webhcat_user,
            mode=0755,
            group=params.user_group,
            recursive=True)

  Directory(params.config_dir,
            recursive=True,
            owner=params.webhcat_user,
            group=params.user_group)

  if params.security_enabled:
    kinit_if_needed = format("{kinit_path_local} -kt {hdfs_user_keytab} {hdfs_principal_name};")
  else:
    kinit_if_needed = ""

  if kinit_if_needed:
    Execute(kinit_if_needed,
            user=params.webhcat_user,
            path='/bin'
    )

  # Replace _HOST with hostname in relevant principal-related properties
  webhcat_site = params.config['configurations']['webhcat-site'].copy()
  for prop_name in ['templeton.hive.properties', 'templeton.kerberos.principal']:
    if prop_name in webhcat_site:
      webhcat_site[prop_name] = webhcat_site[prop_name].replace("_HOST", params.hostname)

  XmlConfig("webhcat-site.xml",
            conf_dir=params.config_dir,
            configurations=webhcat_site,
            configuration_attributes=params.config['configuration_attributes']['webhcat-site'],
            owner=params.webhcat_user,
            group=params.user_group,
            )

  File(format("{config_dir}/webhcat-env.sh"),
       owner=params.webhcat_user,
       group=params.user_group,
       content=InlineTemplate(params.webhcat_env_sh_template)
  )
  
  Directory(params.webhcat_conf_dir,
       cd_access='a',
       recursive=True
  )

  log4j_webhcat_filename = 'webhcat-log4j.properties'
  if (params.log4j_webhcat_props != None):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=params.log4j_webhcat_props
    )
  elif (os.path.exists("{config_dir}/{log4j_webhcat_filename}.template")):
    File(format("{config_dir}/{log4j_webhcat_filename}"),
         mode=0644,
         group=params.user_group,
         owner=params.webhcat_user,
         content=StaticFile(format("{config_dir}/{log4j_webhcat_filename}.template"))
    )

  setup_atlas_hive()
Example #8
0
def hive(name=None):
  import params

  if name == 'hiveserver2':
    # HDP 2.1.* or lower
    if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, "2.2.0.0") < 0:
      params.HdfsResource(params.webhcat_apps_dir,
                            type="directory",
                            action="create_on_execute",
                            owner=params.webhcat_user,
                            mode=0755
                          )
    
    # Create webhcat dirs.
    if params.hcat_hdfs_user_dir != params.webhcat_hdfs_user_dir:
      params.HdfsResource(params.hcat_hdfs_user_dir,
                           type="directory",
                           action="create_on_execute",
                           owner=params.hcat_user,
                           mode=params.hcat_hdfs_user_mode
      )

    params.HdfsResource(params.webhcat_hdfs_user_dir,
                         type="directory",
                         action="create_on_execute",
                         owner=params.webhcat_user,
                         mode=params.webhcat_hdfs_user_mode
    )

    # ****** Begin Copy Tarballs ******
    # *********************************
    # HDP 2.2 or higher, copy mapreduce.tar.gz to HDFS
    if params.hdp_stack_version_major != "" and compare_versions(params.hdp_stack_version_major, '2.2') >= 0:
      copy_to_hdfs("mapreduce", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)

    # Always copy pig.tar.gz and hive.tar.gz using the appropriate mode.
    # This can use a different source and dest location to account for both HDP 2.1 and 2.2
    copy_to_hdfs("pig",
                 params.user_group,
                 params.hdfs_user,
                 file_mode=params.tarballs_mode,
                 custom_source_file=params.pig_tar_source,
                 custom_dest_file=params.pig_tar_dest_file,
                 host_sys_prepped=params.host_sys_prepped)
    copy_to_hdfs("hive",
                 params.user_group,
                 params.hdfs_user,
                 file_mode=params.tarballs_mode,
                 custom_source_file=params.hive_tar_source,
                 custom_dest_file=params.hive_tar_dest_file,
                 host_sys_prepped=params.host_sys_prepped)

    wildcard_tarballs = ["sqoop", "hadoop_streaming"]
    for tarball_name in wildcard_tarballs:
      source_file_pattern = eval("params." + tarball_name + "_tar_source")
      dest_dir = eval("params." + tarball_name + "_tar_dest_dir")

      if source_file_pattern is None or dest_dir is None:
        continue

      source_files = glob.glob(source_file_pattern) if "*" in source_file_pattern else [source_file_pattern]
      for source_file in source_files:
        src_filename = os.path.basename(source_file)
        dest_file = os.path.join(dest_dir, src_filename)

        copy_to_hdfs(tarball_name,
                     params.user_group,
                     params.hdfs_user,
                     file_mode=params.tarballs_mode,
                     custom_source_file=source_file,
                     custom_dest_file=dest_file,
                     host_sys_prepped=params.host_sys_prepped)
    # ******* End Copy Tarballs *******
    # *********************************

    # Create Hive Metastore Warehouse Dir
    params.HdfsResource(params.hive_apps_whs_dir,
                         type="directory",
                          action="create_on_execute",
                          owner=params.hive_user,
                          mode=0777
    )

    # Create Hive User Dir
    params.HdfsResource(params.hive_hdfs_user_dir,
                         type="directory",
                          action="create_on_execute",
                          owner=params.hive_user,
                          mode=params.hive_hdfs_user_mode
    )
    
    if not is_empty(params.hive_exec_scratchdir) and not urlparse(params.hive_exec_scratchdir).path.startswith("/tmp"):
      params.HdfsResource(params.hive_exec_scratchdir,
                           type="directory",
                           action="create_on_execute",
                           owner=params.hive_user,
                           group=params.hdfs_user,
                           mode=0777) # Hive expects this dir to be writeable by everyone as it is used as a temp dir
      
    params.HdfsResource(None, action="execute")

  Directory(params.hive_etc_dir_prefix,
            mode=0755
  )

  # We should change configurations for client as well as for server.
  # The reason is that stale-configs are service-level, not component.
  for conf_dir in params.hive_conf_dirs_list:
    fill_conf_dir(conf_dir)

  XmlConfig("hive-site.xml",
            conf_dir=params.hive_config_dir,
            configurations=params.hive_site_config,
            configuration_attributes=params.config['configuration_attributes']['hive-site'],
            owner=params.hive_user,
            group=params.user_group,
            mode=0644)

  setup_atlas_hive()
  
  if params.hive_specific_configs_supported and name == 'hiveserver2':
    XmlConfig("hiveserver2-site.xml",
              conf_dir=params.hive_server_conf_dir,
              configurations=params.config['configurations']['hiveserver2-site'],
              configuration_attributes=params.config['configuration_attributes']['hiveserver2-site'],
              owner=params.hive_user,
              group=params.user_group,
              mode=0644)
  
  File(format("{hive_config_dir}/hive-env.sh"),
       owner=params.hive_user,
       group=params.user_group,
       content=InlineTemplate(params.hive_env_sh_template)
  )

  # On some OS this folder could be not exists, so we will create it before pushing there files
  Directory(params.limits_conf_dir,
            recursive=True,
            owner='root',
            group='root'
            )

  File(os.path.join(params.limits_conf_dir, 'hive.conf'),
       owner='root',
       group='root',
       mode=0644,
       content=Template("hive.conf.j2")
       )

  if (name == 'metastore' or name == 'hiveserver2') and not os.path.exists(params.target):
    jdbc_connector()

  File(format("/usr/lib/ambari-agent/{check_db_connection_jar_name}"),
       content = DownloadSource(format("{jdk_location}{check_db_connection_jar_name}")),
       mode = 0644,
  )

  if name == 'metastore':
    File(params.start_metastore_path,
         mode=0755,
         content=StaticFile('startMetastore.sh')
    )
    if params.init_metastore_schema:
      create_schema_cmd = format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                                 "{hive_bin}/schematool -initSchema "
                                 "-dbType {hive_metastore_db_type} "
                                 "-userName {hive_metastore_user_name} "
                                 "-passWord {hive_metastore_user_passwd!p}")

      check_schema_created_cmd = as_user(format("export HIVE_CONF_DIR={hive_server_conf_dir} ; "
                                        "{hive_bin}/schematool -info "
                                        "-dbType {hive_metastore_db_type} "
                                        "-userName {hive_metastore_user_name} "
                                        "-passWord {hive_metastore_user_passwd!p}"), params.hive_user)

      # HACK: in cases with quoted passwords and as_user (which does the quoting as well) !p won't work for hiding passwords.
      # Fixing it with the hack below:
      quoted_hive_metastore_user_passwd = quote_bash_args(quote_bash_args(params.hive_metastore_user_passwd))
      if quoted_hive_metastore_user_passwd[0] == "'" and quoted_hive_metastore_user_passwd[-1] == "'" \
          or quoted_hive_metastore_user_passwd[0] == '"' and quoted_hive_metastore_user_passwd[-1] == '"':
        quoted_hive_metastore_user_passwd = quoted_hive_metastore_user_passwd[1:-1]
      Logger.sensitive_strings[repr(check_schema_created_cmd)] = repr(check_schema_created_cmd.replace(
          format("-passWord {quoted_hive_metastore_user_passwd}"), "-passWord " + utils.PASSWORDS_HIDE_STRING))

      Execute(create_schema_cmd,
              not_if = check_schema_created_cmd,
              user = params.hive_user
      )
  elif name == 'hiveserver2':
    File(params.start_hiveserver2_path,
         mode=0755,
         content=Template(format('{start_hiveserver2_script}'))
    )

  if name != "client":
    crt_directory(params.hive_pid_dir)
    crt_directory(params.hive_log_dir)
    crt_directory(params.hive_var_lib)