Exemple #1
0
def spark_service(name, upgrade_type=None, action=None):
  import params

  if action == 'start':

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
      effective_version = format_stack_version(effective_version)

    if name == 'jobhistoryserver':
      # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs
      if not params.sysprep_skip_copy_tarballs_hdfs:
          source_dir=params.spark_home+"/jars"
          tmp_archive_file=get_tarball_paths("spark2")[1]
          make_tarfile(tmp_archive_file, source_dir)
          copy_to_hdfs("spark2", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True)
      # create spark history directory
      params.HdfsResource(params.spark_history_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          group=params.user_group,
                          mode=0777,
                          recursive_chmod=True
                          )
      params.HdfsResource(None, action="execute")

    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs)
      if resource_created:
        params.HdfsResource(None, action="execute")

    if name == 'jobhistoryserver':
      historyserver_no_op_test = format(
      'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_history_server_start}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=historyserver_no_op_test)
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

    elif name == 'sparkthriftserver':
      if params.security_enabled:
        hive_principal = params.hive_kerberos_principal
        hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ")
        Execute(hive_kinit_cmd, user=params.hive_user)

      thriftserver_no_op_test = format(
      'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=thriftserver_no_op_test
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
  elif action == 'stop':
    if name == 'jobhistoryserver':
      try:
        Execute(format('{spark_history_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_history_server_pid_file,
        action="delete"
      )

    elif name == 'sparkthriftserver':
      try:
        Execute(format('{spark_thrift_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_thrift_server_pid_file,
        action="delete"
      )
Exemple #2
0
def spark_service(name, action=None):
    import params

    if action == 'start':
        if name == 'jobhistoryserver':
            if not params.sysprep_skip_copy_tarballs_hdfs:
                source_dir = params.spark_home + "/jars"
                source_cp_dir = '/tmp/spark_jars'
                Execute('cp -rf %s %s;rm -rf %s/hadoop-*' %
                        (source_dir, source_cp_dir, source_cp_dir))
                tmp_archive_file = get_tarball_paths("spark2")[1]
                make_tarfile(tmp_archive_file, source_cp_dir)
                Execute('rm -rf %s' % source_cp_dir)
                copy_to_hdfs("spark2",
                             params.user_group,
                             params.hdfs_user,
                             custom_dest_file=params.spark_yarn_archive,
                             skip=params.sysprep_skip_copy_tarballs_hdfs,
                             replace_existing_files=True)

            # create spark history directory
            params.HdfsResource(params.spark_history_dir,
                                type="directory",
                                action="create_on_execute",
                                owner=params.spark_user,
                                group=params.user_group,
                                mode=0777,
                                recursive_chmod=True)
            params.HdfsResource(None, action="execute")

        if params.security_enabled:
            spark_kinit_cmd = format(
                "{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; "
            )
            Execute(spark_kinit_cmd, user=params.spark_user)

        if name == 'jobhistoryserver':

            create_catalog_cmd = format(
                "{hive_schematool_bin}/schematool -dbType {hive_metastore_db_type} "
                "-createCatalog {default_metastore_catalog} "
                "-catalogDescription 'Default catalog, for Spark' -ifNotExists "
                "-catalogLocation {default_fs}{spark_warehouse_dir}")

            # Execute(create_catalog_cmd, user=params.hive_user)

            historyserver_no_op_test = as_sudo([
                "test", "-f", params.spark_history_server_pid_file
            ]) + " && " + as_sudo(
                ["pgrep", "-F", params.spark_history_server_pid_file])
            try:
                Execute(' source /etc/spark/spark-env.sh; ' +
                        params.spark_history_server_start,
                        user=params.spark_user,
                        environment={'JAVA_HOME': params.java_home},
                        not_if=historyserver_no_op_test)
            except:
                show_logs(params.spark_log_dir, user=params.spark_user)
                raise

        elif name == 'sparkthriftserver':
            if params.security_enabled:
                hive_kinit_cmd = format(
                    "{kinit_path_local} -kt {hive_kerberos_keytab} {hive_kerberos_principal}; "
                )
                Execute(hive_kinit_cmd, user=params.spark_user)

            thriftserver_no_op_test = as_sudo([
                "test", "-f", params.spark_thrift_server_pid_file
            ]) + " && " + as_sudo(
                ["pgrep", "-F", params.spark_thrift_server_pid_file])
            try:
                Execute(' source /etc/spark/spark-env.sh; ' + format(
                    '{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'
                ),
                        user=params.spark_user,
                        environment={'JAVA_HOME': params.java_home},
                        not_if=thriftserver_no_op_test)
            except:
                show_logs(params.spark_log_dir, user=params.spark_user)
                raise
    elif action == 'stop':
        if name == 'jobhistoryserver':
            try:
                Execute(format('{spark_history_server_stop}'),
                        user=params.spark_user,
                        environment={'JAVA_HOME': params.java_home})
            except:
                show_logs(params.spark_log_dir, user=params.spark_user)
                raise
            File(params.spark_history_server_pid_file, action="delete")

        elif name == 'sparkthriftserver':
            try:
                Execute(format('{spark_thrift_server_stop}'),
                        user=params.spark_user,
                        environment={'JAVA_HOME': params.java_home})
            except:
                show_logs(params.spark_log_dir, user=params.spark_user)
                raise
            File(params.spark_thrift_server_pid_file, action="delete")
def spark_service(name, upgrade_type=None, action=None):
  import params

  if action == 'start':

    check_sac_jar()

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
      effective_version = format_stack_version(effective_version)

    if name == 'jobhistoryserver' and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
      # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs
      if not params.sysprep_skip_copy_tarballs_hdfs:
        source_dirs = [params.spark_home + "/jars"]

        # include sac jar and spark-job keytab to archive
        if params.sac_enabled:
          if params.security_enabled:
            shutil.copy(params.atlas_kafka_keytab, source_dirs[0])
            os.chmod(os.path.join(source_dirs[0], os.path.basename(params.atlas_kafka_keytab)), 0440)

          source_dirs.append(params.spark_atlas_jar_dir)


        tmp_archive_file=get_tarball_paths("spark2")[1]
        make_tarfile(tmp_archive_file, source_dirs)
        copy_to_hdfs("spark2", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True)

        if params.sac_enabled and params.security_enabled:
          os.remove(os.path.join(source_dirs[0], os.path.basename(params.atlas_kafka_keytab)))

      # create & copy spark2-hdp-hive-archive.tar.gz to hdfs
      if not params.sysprep_skip_copy_tarballs_hdfs:
        source_dirs = [params.spark_home+"/standalone-metastore"]
        tmp_archive_file=get_tarball_paths("spark2hive")[1]
        make_tarfile(tmp_archive_file, source_dirs)
        copy_to_hdfs("spark2hive", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True)

      # create spark history directory
      params.HdfsResource(params.spark_history_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          group=params.user_group,
                          mode=0777,
                          recursive_chmod=True
                          )
      params.HdfsResource(None, action="execute")

    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs)
      if resource_created:
        params.HdfsResource(None, action="execute")

    if name == 'jobhistoryserver':

      create_catalog_cmd = format("{hive_schematool_bin}/schematool -dbType {hive_metastore_db_type} "
                                    "-createCatalog {default_metastore_catalog} "
                                    "-catalogDescription 'Default catalog, for Spark' -ifNotExists "
                                    "-catalogLocation {default_fs}{spark_warehouse_dir}")

      Execute(create_catalog_cmd,
                user = params.hive_user)

      historyserver_no_op_test = as_sudo(["test", "-f", params.spark_history_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_history_server_pid_file])
      try:
        Execute(params.spark_history_server_start,
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=historyserver_no_op_test)
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

    elif name == 'sparkthriftserver':
      import status_params
      if params.security_enabled:
        hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_kerberos_principal}; ")
        Execute(hive_kinit_cmd, user=params.spark_user)

      thriftserver_no_op_test= as_sudo(["test", "-f", params.spark_thrift_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_thrift_server_pid_file])
      try:
        Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=thriftserver_no_op_test
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

      hive_connection_created = False
      i = 0
      while i < 15:
        time.sleep(30)
        Logger.info("Check connection to STS is created.")

        beeline_url = ["jdbc:hive2://{fqdn}:{spark_thrift_port}/default"]

        if params.security_enabled:
            beeline_url.append("principal={hive_kerberos_principal}")

        beeline_url.append("transportMode={spark_transport_mode}")

        if params.spark_transport_mode.lower() == 'http':
            beeline_url.append("httpPath={spark_thrift_endpoint}")
            if params.spark_thrift_ssl_enabled:
                beeline_url.append("ssl=true")

        beeline_cmd = os.path.join(params.spark_home, "bin", "beeline")
        cmd = "! %s -u '%s'  -e '' 2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL' -e 'Error: Could not open'" % \
              (beeline_cmd, format(";".join(beeline_url)))

        try:
          Execute(cmd, user=params.spark_user, path=[beeline_cmd], timeout=CHECK_COMMAND_TIMEOUT_DEFAULT)
          hive_connection_created = True
          Logger.info("Connection to STS is created.")
          break
        except:
          Logger.info("Connection to STS still is not created.")
          pass

        Logger.info("Check STS process status.")
        check_process_status(status_params.spark_thrift_server_pid_file)

        i+=1

      if not hive_connection_created:
        raise ComponentIsNotRunning("Something goes wrong, STS connection was not created but STS process still alive. "
                                    "Potential problems: Hive/YARN doesn't work correctly or too slow. For more information check STS logs.")

  elif action == 'stop':
    if name == 'jobhistoryserver':
      try:
        Execute(format('{spark_history_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_history_server_pid_file,
        action="delete"
      )

    elif name == 'sparkthriftserver':
      try:
        Execute(format('{spark_thrift_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_thrift_server_pid_file,
        action="delete"
      )
Exemple #4
0
def spark_service(name, upgrade_type=None, action=None):
  import params

  if action == 'start':

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
      effective_version = format_stack_version(effective_version)

    if name == 'jobhistoryserver' and effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
      # create & copy spark2-hdp-yarn-archive.tar.gz to hdfs
      if not params.sysprep_skip_copy_tarballs_hdfs:
          source_dir=params.spark_home+"/jars"
          tmp_archive_file=get_tarball_paths("spark2")[1]
          make_tarfile(tmp_archive_file, source_dir)
          copy_to_hdfs("spark2", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True)

      # create & copy spark2-hdp-hive-archive.tar.gz to hdfs
      if not params.sysprep_skip_copy_tarballs_hdfs:
        source_dir=params.spark_home+"/standalone-metastore"
        tmp_archive_file=get_tarball_paths("spark2hive")[1]
        make_tarfile(tmp_archive_file, source_dir)
        copy_to_hdfs("spark2hive", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs, replace_existing_files=True)

      # create spark history directory
      params.HdfsResource(params.spark_history_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          group=params.user_group,
                          mode=0777,
                          recursive_chmod=True
                          )
      params.HdfsResource(None, action="execute")

    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, skip=params.sysprep_skip_copy_tarballs_hdfs)
      if resource_created:
        params.HdfsResource(None, action="execute")

    if name == 'jobhistoryserver':

      create_catalog_cmd = format("{hive_schematool_bin}/schematool -dbType {hive_metastore_db_type} "
                                    "-createCatalog {default_metastore_catalog} "
                                    "-catalogDescription 'Default catalog, for Spark' -ifNotExists "
                                    "-catalogLocation {default_fs}{spark_warehouse_dir}")

      Execute(create_catalog_cmd,
                user = params.hive_user)

      historyserver_no_op_test = as_sudo(["test", "-f", params.spark_history_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_history_server_pid_file])
      try:
        Execute(params.spark_history_server_start,
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=historyserver_no_op_test)
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

    elif name == 'sparkthriftserver':
      if params.security_enabled:
        hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_kerberos_principal}; ")
        Execute(hive_kinit_cmd, user=params.spark_user)

      thriftserver_no_op_test= as_sudo(["test", "-f", params.spark_thrift_server_pid_file]) + " && " + as_sudo(["pgrep", "-F", params.spark_thrift_server_pid_file])
      try:
        Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=thriftserver_no_op_test
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
  elif action == 'stop':
    if name == 'jobhistoryserver':
      try:
        Execute(format('{spark_history_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_history_server_pid_file,
        action="delete"
      )

    elif name == 'sparkthriftserver':
      try:
        Execute(format('{spark_thrift_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_thrift_server_pid_file,
        action="delete"
      )