def service(component_name, action="start"):
    import params

    if component_name.lower() == "mesos_dispatcher":
        daemon = params.spark_dispatch_start
        pid_file = params.mesos_dispatcher_pid_file
        usr = params.spark_user
    else:
        # daemon = params.spark_history_server_start
        pass

    if action == "start":

        check_process = format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1")
        cmd = format("export MESOS_NATIVE_JAVA_LIBRARY={mesos_native_java_library} && " "{daemon}")

        # Remove the pid file if its corresponding process is not running.
        File(pid_file, action="delete", not_if=check_process)

        # Attempt to start the process. Internally, this is skipped if the process is already running.
        Execute(cmd, user=usr, environment={"JAVA_HOME": params.java_home}, not_if=check_process)

        # Ensure that the process with the expected PID exists.
        Execute(check_process, user=usr, not_if=check_process, initial_wait=5)

    elif action == "stop":
        cmd = format("{spark_dispatch_stop}")
        Execute(cmd, user=usr, environment={"JAVA_HOME": params.java_home})

        File(pid_file, action="delete")
Esempio n. 2
0
def jdbc_connector():
  import params
  from urllib2 import HTTPError
  from resource_management import Fail
  for jar_name in params.sqoop_jdbc_drivers_dict:
    if 'mysql-connector-java.jar' in jar_name:
      continue
    downloaded_custom_connector = format("{sqoop_lib}/{jar_name}")
    jdbc_symlink_remote = params.sqoop_jdbc_drivers_dict[jar_name]
    jdbc_driver_label = params.sqoop_jdbc_drivers_name_dict[jar_name]
    driver_curl_source = format("{jdk_location}/{jdbc_symlink_remote}")
    environment = {
      "no_proxy": format("{ambari_server_hostname}")
    }
    try:
      File(downloaded_custom_connector,
           content = DownloadSource(driver_curl_source),
           mode = 0644,
      )
    except HTTPError:
      error_string = format("Could not download {driver_curl_source}\n\
                 Please upload jdbc driver to server by run command:\n\
                 ambari-server setup --jdbc-db={jdbc_driver_label} --jdbc-driver=<PATH TO DRIVER>\n\
                 at {ambari_server_hostname}") 
      raise Fail(error_string)
def _get_current_hiveserver_version():
  """
  Runs "hive --version" and parses the result in order
  to obtain the current version of hive.

  :return:  the hiveserver2 version, returned by "hive --version"
  """
  import params

  try:
    # When downgrading the source version should be the version we are downgrading from
    if "downgrade" == params.upgrade_direction:
      if not params.downgrade_from_version:
        raise Fail('The version from which we are downgrading from should be provided in \'downgrade_from_version\'')
      source_version = params.downgrade_from_version
    else:
      source_version = params.current_version
    hive_execute_path = _get_hive_execute_path(source_version)
    version_hive_bin = params.hive_bin
    formatted_source_version = format_hdp_stack_version(source_version)
    if formatted_source_version and compare_versions(formatted_source_version, "2.2") >= 0:
      version_hive_bin = format('/usr/hdp/{source_version}/hive/bin')
    command = format('{version_hive_bin}/hive --version')
    return_code, hdp_output = shell.call(command, user=params.hive_user, path=hive_execute_path)
  except Exception, e:
    Logger.error(str(e))
    raise Fail('Unable to execute hive --version command to retrieve the hiveserver2 version.')
def spark_service(action):
  import params
  
  if action == 'start':
    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.hdp_stack_version and compare_versions(params.hdp_stack_version, '2.3.0.0') < 0:
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user)
      if resource_created:
        params.HdfsResource(None, action="execute")

    no_op_test = format(
      'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1')
    Execute(format('{spark_history_server_start}'),
            user=params.spark_user,
            environment={'JAVA_HOME': params.java_home},
            not_if=no_op_test
    )
  elif action == 'stop':
    Execute(format('{spark_history_server_stop}'),
            user=params.spark_user,
            environment={'JAVA_HOME': params.java_home}
    )
    File(params.spark_history_server_pid_file,
         action="delete"
    )
Esempio n. 5
0
def _get_directory_mappings_during_upgrade():
  """
  Gets a dictionary of directory to archive name that represents the
  directories that need to be backed up and their output tarball archive targets
  :return:  the dictionary of directory to tarball mappings
  """
  import params

  # Must be performing an Upgrade
  if params.upgrade_direction is None or params.upgrade_direction != Direction.UPGRADE or \
          params.upgrade_from_version is None or params.upgrade_from_version == "":
    Logger.error("Function _get_directory_mappings_during_upgrade() can only be called during a Stack Upgrade in direction UPGRADE.")
    return {}

  # By default, use this for all stacks.
  knox_data_dir = '/var/lib/knox/data'

  if params.stack_name and params.stack_name.upper() == "HDP" and \
          compare_versions(format_hdp_stack_version(params.upgrade_from_version), "2.3.0.0") > 0:
    # Use the version that is being upgraded from.
    knox_data_dir = format('/usr/hdp/{upgrade_from_version}/knox/data')

  # the trailing "/" is important here so as to not include the "conf" folder itself
  directories = {knox_data_dir: BACKUP_DATA_ARCHIVE, params.knox_conf_dir + "/": BACKUP_CONF_ARCHIVE}

  Logger.info(format("Knox directories to backup:\n{directories}"))
  return directories
  def _create_file(self, target, source=None, mode=""):
    """
    PUT file command in slow, however _get_file_status is pretty fast,
    so we should check if the file really should be put before doing it.
    """
    file_status = self._get_file_status(target) if target!=self.main_resource.resource.target else self.target_status
    mode = "" if not mode else mode

    if file_status:
      if source:
        length = file_status['length']
        local_file_size = os.stat(source).st_size # TODO: os -> sudo

        # TODO: re-implement this using checksums
        if local_file_size == length:
          Logger.info(format("DFS file {target} is identical to {source}, skipping the copying"))
          return
        elif not self.main_resource.resource.replace_existing_files:
          Logger.info(format("Not replacing existing DFS file {target} which is different from {source}, due to replace_existing_files=False"))
          return
      else:
        Logger.info(format("File {target} already exists in DFS, skipping the creation"))
        return

    Logger.info(format("Creating new file {target} in DFS"))
    kwargs = {'permission': mode} if mode else {}

    self.util.run_command(target, 'CREATE', method='PUT', overwrite=True, assertable_result=False, file_to_put=source, **kwargs)

    if mode and file_status:
      file_status['permission'] = mode
Esempio n. 7
0
def check_thrift_port_sasl(address, port, hive_auth = "NOSASL", key = None, kinitcmd = None, smokeuser = '******',
                           transport_mode = "binary"):
  """
  Hive thrift SASL port check
  """
  BEELINE_CHECK_TIMEOUT = 30

  if kinitcmd:
    url = format("jdbc:hive2://{address}:{port}/;principal={key}")
    Execute(kinitcmd,
            user=smokeuser
    )
  else:
    url = format("jdbc:hive2://{address}:{port}")

  if hive_auth != "NOSASL" and transport_mode != "http":
    cmd = format("! beeline -u '{url}' -e '' ") + "2>&1| awk '{print}'|grep -i -e 'Connection refused' -e 'Invalid URL'"
    Execute(cmd,
            user=smokeuser,
            path=["/bin/", "/usr/bin/", "/usr/lib/hive/bin/", "/usr/sbin/"],
            timeout=BEELINE_CHECK_TIMEOUT
    )
  else:
    s = socket.socket()
    s.settimeout(1)
    try:
      s.connect((address, port))
    except socket.error, e:
      raise
    finally:
def execute(configurations={}, parameters={}, host_name=None):
  """
  Returns a tuple containing the result code and a pre-formatted result label

  Keyword arguments:
  configurations (dictionary): a mapping of configuration key to value
  parameters (dictionary): a mapping of script parameter key to value
  host_name (string): the name of this host where the alert is running
  """

  from resource_management.libraries.functions import reload_windows_env
  from resource_management.core.resources import Execute
  reload_windows_env()
  hive_home = os.environ['HIVE_HOME']

  if configurations is None:
    return ('UNKNOWN', ['There were no configurations supplied to the script.'])

  transport_mode = HIVE_SERVER_TRANSPORT_MODE_DEFAULT
  if HIVE_SERVER_TRANSPORT_MODE_KEY in configurations:
    transport_mode = configurations[HIVE_SERVER_TRANSPORT_MODE_KEY]

  port = THRIFT_PORT_DEFAULT
  if transport_mode.lower() == 'binary' and HIVE_SERVER_THRIFT_PORT_KEY in configurations:
    port = int(configurations[HIVE_SERVER_THRIFT_PORT_KEY])
  elif transport_mode.lower() == 'http' and HIVE_SERVER_THRIFT_HTTP_PORT_KEY in configurations:
    port = int(configurations[HIVE_SERVER_THRIFT_HTTP_PORT_KEY])

  hiveuser = HADOOPUSER_DEFAULT
  if HADOOPUSER_KEY in configurations:
    hiveuser = configurations[HADOOPUSER_KEY]

  result_code = None
  try:
    if host_name is None:
      host_name = socket.getfqdn()

    beeline_url = ['jdbc:hive2://{host_name}:{port}/', "transportMode={transport_mode}"]
    # append url according to used transport
    if transport_mode == "http":
      beeline_url.append('httpPath=cliservice')
    beeline_url_string = format(";".join(beeline_url))
    beeline_cmd = os.path.join(hive_home, "bin", "beeline.cmd")
    cmd = format("cmd /c {beeline_cmd} -u {beeline_url_string} -e '' 2>&1 | findstr Connected")

    start_time = time.time()
    try:
      Execute(cmd, user=hiveuser, timeout=30)
      total_time = time.time() - start_time
      result_code = 'OK'
      label = OK_MESSAGE.format(total_time, port)
    except:
      result_code = 'CRITICAL'
      label = CRITICAL_MESSAGE.format(host_name, port, traceback.format_exc())
  except:
    label = traceback.format_exc()
    result_code = 'UNKNOWN'

  return (result_code, [label])
Esempio n. 9
0
 def stop(self, env, rolling_restart=False):
   import params
   env.set_params(params)
   daemon_cmd = format('source {params.conf_dir}/atlas-env.sh; {params.metadata_stop_script}')
   Execute(daemon_cmd,
           user=params.metadata_user,
   )
   Execute (format("rm -f {params.pid_file}"))
def get_check_command(oozie_url, host_name, configurations):
  if OOZIE_USER in configurations:
    oozie_user = configurations[OOZIE_USER]
  else:
    raise Exception("Oozie user is required")
    
  security_enabled = False
  if SECURITY_ENABLED in configurations:
    security_enabled = str(configurations[SECURITY_ENABLED]).upper() == 'TRUE'
  kerberos_env = None
  if security_enabled:
    if OOZIE_KEYTAB in configurations and OOZIE_PRINCIPAL in configurations:
      oozie_keytab = configurations[OOZIE_KEYTAB]
      oozie_principal = configurations[OOZIE_PRINCIPAL]

      # substitute _HOST in kerberos principal with actual fqdn
      oozie_principal = oozie_principal.replace('_HOST', host_name)
    else:
      raise KerberosPropertiesNotFound('The Oozie keytab and principal are required configurations when security is enabled.')

    # Create the kerberos credentials cache (ccache) file and set it in the environment to use
    # when executing curl
    env = Environment.get_instance()
    ccache_file = "{0}{1}oozie_alert_cc_{2}".format(env.tmp_dir, os.sep, os.getpid())
    kerberos_env = {'KRB5CCNAME': ccache_file}

    # Get the configured Kerberos executable search paths, if any
    if KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY in configurations:
      kerberos_executable_search_paths = configurations[KERBEROS_EXECUTABLE_SEARCH_PATHS_KEY]
    else:
      kerberos_executable_search_paths = None

    klist_path_local = get_klist_path(kerberos_executable_search_paths)
    klist_command = format("{klist_path_local} -s {ccache_file}")

    # Determine if we need to kinit by testing to see if the relevant cache exists and has
    # non-expired tickets.  Tickets are marked to expire after 5 minutes to help reduce the number
    # it kinits we do but recover quickly when keytabs are regenerated
    return_code, _ = call(klist_command, user=oozie_user)
    if return_code != 0:
      kinit_path_local = get_kinit_path(kerberos_executable_search_paths)
      kinit_command = format("{kinit_path_local} -l 5m -kt {oozie_keytab} {oozie_principal}; ")

      # kinit
      Execute(kinit_command, 
              environment=kerberos_env,
              user=oozie_user,
      )

  # oozie configuration directory uses a symlink when > HDP 2.2
  oozie_config_directory = OOZIE_CONF_DIR_LEGACY
  if os.path.exists(OOZIE_CONF_DIR):
    oozie_config_directory = OOZIE_CONF_DIR

  command = "source {0}/oozie-env.sh ; oozie admin -oozie {1} -status".format(
    oozie_config_directory, oozie_url)

  return (command, kerberos_env, oozie_user)
 def _copy_from_local_directory(self, target, source):
   for next_path_part in os.listdir(source):
     new_source = os.path.join(source, next_path_part)
     new_target = format("{target}/{next_path_part}")
     if os.path.isdir(new_source):
       Logger.info(format("Creating DFS directory {new_target}"))
       self._create_directory(new_target)
       self._copy_from_local_directory(new_target, new_source)
     else:
       self._create_file(new_target, new_source)
Esempio n. 12
0
 def startdemoldap(self, env):
   import params
   env.set_params(params)
   self.configureldap(env)
   daemon_cmd = format('{ldap_bin} start')
   no_op_test = format('ls {ldap_pid_file} >/dev/null 2>&1 && ps -p `cat {ldap_pid_file}` >/dev/null 2>&1')
   Execute(daemon_cmd,
           user=params.knox_user,
           environment={'JAVA_HOME': params.java_home},
           not_if=no_op_test
   )
Esempio n. 13
0
  def start(self, env, rolling_restart=False):
    import params
    env.set_params(params)
    self.configure(env)

    daemon_cmd = format('source {params.conf_dir}/atlas-env.sh ; {params.metadata_start_script}')
    no_op_test = format('ls {params.pid_file} >/dev/null 2>&1 && ps -p `cat {params.pid_file}` >/dev/null 2>&1')
    Execute(daemon_cmd,
            user=params.metadata_user,
            not_if=no_op_test
    )
def phoenix_service(action = 'start'): # 'start', 'stop', 'status'
    # Note: params/status_params should already be imported before calling phoenix_service()
    pid_file = format("{pid_dir}/phoenix-{hbase_user}-server.pid")
    no_op_test = format("ls {pid_file} >/dev/null 2>&1 && ps -p `cat {pid_file}` >/dev/null 2>&1")

    if action == "status":
      check_process_status(pid_file)
    else:
      env = {'JAVA_HOME': format("{java64_home}"), 'HBASE_CONF_DIR': format("{hbase_conf_dir}")}
      daemon_cmd = format("{phx_daemon_script} {action}")
      if action == 'start':
        Execute(daemon_cmd,
                user=format("{hbase_user}"),
                environment=env)
  
      elif action == 'stop':
        Execute(daemon_cmd,
                timeout = 30,
                on_timeout = format("! ( {no_op_test} ) || {sudo} -H -E kill -9 `cat {pid_file}`"),
                user=format("{hbase_user}"),
                environment=env
        )
        File(pid_file,
             action = "delete"
        )
Esempio n. 15
0
def run_migration(env, upgrade_type):
  """
  If the acl migration script is present, then run it for either upgrade or downgrade.
  That script was introduced in HDP 2.3.4.0 and requires stopping all Kafka brokers first.
  Requires configs to be present.
  :param env: Environment.
  :param upgrade_type: "rolling" or "nonrolling
  """
  import params

  if upgrade_type is None:
    raise Fail('Parameter "upgrade_type" is missing.')

  if params.upgrade_direction is None:
    raise Fail('Parameter "upgrade_direction" is missing.')

  if params.upgrade_direction == Direction.DOWNGRADE and params.downgrade_from_version is None:
    raise Fail('Parameter "downgrade_from_version" is missing.')

  if not params.security_enabled:
    Logger.info("Skip running the Kafka ACL migration script since cluster security is not enabled.")
    return
  
  Logger.info("Upgrade type: {0}, direction: {1}".format(str(upgrade_type), params.upgrade_direction))

  # If the schema upgrade script exists in the version upgrading to, then attempt to upgrade/downgrade it while still using the present bits.
  kafka_acls_script = None
  command_suffix = ""
  if params.upgrade_direction == Direction.UPGRADE:
    kafka_acls_script = format("/usr/hdp/{version}/kafka/bin/kafka-acls.sh")
    command_suffix = "--upgradeAcls"
  elif params.upgrade_direction == Direction.DOWNGRADE:
    kafka_acls_script = format("/usr/hdp/{downgrade_from_version}/kafka/bin/kafka-acls.sh")
    command_suffix = "--downgradeAcls"

  if kafka_acls_script is not None:
    if os.path.exists(kafka_acls_script):
      Logger.info("Found Kafka acls script: {0}".format(kafka_acls_script))
      if params.zookeeper_connect is None:
        raise Fail("Could not retrieve property kafka-broker/zookeeper.connect")

      acls_command = "{0} --authorizer kafka.security.auth.SimpleAclAuthorizer --authorizer-properties zookeeper.connect={1} {2}".\
        format(kafka_acls_script, params.zookeeper_connect, command_suffix)

      Execute(acls_command,
              user=params.kafka_user,
              logoutput=True)
    else:
      Logger.info("Did not find Kafka acls script: {0}".format(kafka_acls_script))
def get_check_command(oozie_url, host_name, configurations):
  from resource_management.libraries.functions import reload_windows_env
  reload_windows_env()
  oozie_home = os.environ['OOZIE_HOME']
  oozie_cmd = os.path.join(oozie_home, 'bin', 'oozie.cmd')
  command = format("cmd /c {oozie_cmd} admin -oozie {oozie_url} -status")
  return (command, None, None)
Esempio n. 17
0
  def install_ranger_files(self):
    import params
    splice_lib_dir = "/var/lib/splicemachine"
    ranger_home = format('{params.stack_root}/current/ranger-admin')
    if not os.path.exists(ranger_home):
        print("No ranger installation found")
        return
    ranger_user = params.config['configurations']['ranger-env']['ranger_user']
    ranger_plugins_dir = os.path.join(ranger_home,
                                      "ews/webapp/WEB-INF/classes/ranger-plugins/splicemachine")

    Directory(ranger_plugins_dir,
              owner = ranger_user,
              group = ranger_user,
              create_parents = False
              )

    splice_ranger_jar = self.search_file(splice_lib_dir, "splice_ranger_admin*.jar")
    db_client_jar = self.search_file(splice_lib_dir, "db-client-*.jar")

    Link(os.path.join(ranger_plugins_dir, splice_ranger_jar),
         to = os.path.join(splice_lib_dir, splice_ranger_jar))
    Link(os.path.join(ranger_plugins_dir, db_client_jar),
         to = os.path.join(splice_lib_dir, db_client_jar))

    hbase_user = params.config['configurations']['hbase-env']['hbase_user']
    hdfs_audit_dir = params.config['configurations']['ranger-splicemachine-audit'][
        'xasecure.audit.destination.hdfs.dir']

    params.HdfsResource(hdfs_audit_dir,
                        type="directory",
                        action="create_on_execute",
                        owner=hbase_user
                        )
Esempio n. 18
0
  def pre_rolling_restart(self, env):
    """
    Performs the tasks surrounding the Oozie startup when a rolling upgrade
    is in progress. This includes backing up the configuration, updating
    the database, preparing the WAR, and installing the sharelib in HDFS.
    :param env:
    :return:
    """
    import params
    env.set_params(params)

    # this function should not execute if the version can't be determined or
    # is not at least HDP 2.2.0.0
    if not params.version or compare_versions(format_hdp_stack_version(params.version), '2.2.0.0') < 0:
      return

    Logger.info("Executing Oozie Server Rolling Upgrade pre-restart")

    oozie_server_upgrade.backup_configuration()

    Execute(format("hdp-select set oozie-server {version}"))

    oozie_server_upgrade.restore_configuration()
    oozie_server_upgrade.prepare_libext_directory()
    oozie_server_upgrade.upgrade_oozie()
Esempio n. 19
0
  def status(self, env):
    import status_params
    env.set_params(status_params)
    pid_file = format("{hive_pid_dir}/{hive_pid}")

    # Recursively check all existing gmetad pid files
    check_process_status(pid_file)
def get_live_status(pid_file, flume_conf_directory):
  """
  Gets the status information of a flume agent, including source, sink, and
  channel counts.
  :param pid_file: the PID file of the agent to check
  :param flume_conf_directory:  the configuration directory (ie /etc/flume/conf)
  :return: a dictionary of information about the flume agent
  """
  pid_file_part = pid_file.split(os.sep).pop()

  res = {}
  res['name'] = pid_file_part

  if pid_file_part.endswith(".pid"):
    res['name'] = pid_file_part[:-4]

  res['status'] = 'RUNNING' if is_flume_process_live(pid_file) else 'NOT_RUNNING'
  res['sources_count'] = 0
  res['sinks_count'] = 0
  res['channels_count'] = 0

  flume_agent_conf_dir = flume_conf_directory + os.sep + res['name']
  flume_agent_meta_file = flume_agent_conf_dir + os.sep + 'ambari-meta.json'

  try:
    with open(flume_agent_meta_file) as fp:
      meta = json.load(fp)
      res['sources_count'] = meta['sources_count']
      res['sinks_count'] = meta['sinks_count']
      res['channels_count'] = meta['channels_count']
  except:
    Logger.logger.exception(format("Error reading {flume_agent_meta_file}: "))

  return res
def _check_datanode_shutdown(hdfs_binary):
  """
  Checks that a DataNode is down by running "hdfs dfsamin getDatanodeInfo"
  several times, pausing in between runs. Once the DataNode stops responding
  this method will return, otherwise it will raise a Fail(...) and retry
  automatically.
  The stack defaults for retrying for HDFS are also way too slow for this
  command; they are set to wait about 45 seconds between client retries. As
  a result, a single execution of dfsadmin will take 45 seconds to retry and
  the DataNode may be marked as dead, causing problems with HBase.
  https://issues.apache.org/jira/browse/HDFS-8510 tracks reducing the
  times for ipc.client.connect.retry.interval. In the meantime, override them
  here, but only for RU.
  :param hdfs_binary: name/path of the HDFS binary to use
  :return:
  """
  import params

  # override stock retry timeouts since after 30 seconds, the datanode is
  # marked as dead and can affect HBase during RU
  dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary)
  command = format('{dfsadmin_base_command} -D ipc.client.connect.max.retries=5 -D ipc.client.connect.retry.interval=1000 -getDatanodeInfo {dfs_dn_ipc_address}')

  try:
    Execute(command, user=params.hdfs_user, tries=1)
  except:
    Logger.info("DataNode has successfully shutdown for upgrade.")
    return

  Logger.info("DataNode has not shutdown.")
  raise Fail('DataNode has not shutdown.')
Esempio n. 22
0
 def start(self, env):
     import params
     env.set_params(params)
     self.configure(env)
     start_cmd = format("service sidewinder start")
     Execute(start_cmd)
     print 'Start Sidewinder'
Esempio n. 23
0
    def service_check(self, env):
        import params

        env.set_params(params)
        if params.security_enabled:
            Execute(format("{kinit_path_local}  -kt {smoke_user_keytab} {smokeuser_principal}"), user=params.smokeuser)
        Execute("sqoop version", user=params.smokeuser, path=params.sqoop_bin_dir, logoutput=True)
Esempio n. 24
0
    def service_check(self, env):
        import params

        env.set_params(params)
        smoke_cmd = os.path.join(params.hdp_root, "Run-SmokeTests.cmd")
        service = "SQOOP"
        Execute(format("cmd /C {smoke_cmd} {service}"), logoutput=True)
def pre_rolling_upgrade_shutdown(hdfs_binary):
  """
  Runs the "shutdownDatanode {ipc_address} upgrade" command to shutdown the
  DataNode in preparation for an upgrade. This will then periodically check
  "getDatanodeInfo" to ensure the DataNode has shutdown correctly.
  This function will obtain the Kerberos ticket if security is enabled.
  :param hdfs_binary: name/path of the HDFS binary to use
  :return: Return True if ran ok (even with errors), and False if need to stop the datanode forcefully.
  """
  import params

  Logger.info('DataNode executing "shutdownDatanode" command in preparation for upgrade...')
  if params.security_enabled:
    Execute(params.dn_kinit_cmd, user = params.hdfs_user)

  dfsadmin_base_command = get_dfsadmin_base_command(hdfs_binary)
  command = format('{dfsadmin_base_command} -shutdownDatanode {dfs_dn_ipc_address} upgrade')

  code, output = shell.call(command, user=params.hdfs_user)
  if code == 0:
    # verify that the datanode is down
    _check_datanode_shutdown(hdfs_binary)
  else:
    # Due to bug HDFS-7533, DataNode may not always shutdown during stack upgrade, and it is necessary to kill it.
    if output is not None and re.search("Shutdown already in progress", output):
      Logger.error("Due to a known issue in DataNode, the command {0} did not work, so will need to shutdown the datanode forcefully.".format(command))
      return False
  return True
Esempio n. 26
0
def pre_upgrade_deregister():
  """
  Runs the "hive --service hiveserver2 --deregister <version>" command to
  de-provision the server in preparation for an upgrade. This will contact
  ZooKeeper to remove the server so that clients that attempt to connect
  will be directed to other servers automatically. Once all
  clients have drained, the server will shutdown automatically; this process
  could take a very long time.
  This function will obtain the Kerberos ticket if security is enabled.
  :return:
  """
  import params

  Logger.info('HiveServer2 executing "deregister" command in preparation for upgrade...')

  if params.security_enabled:
    kinit_command=format("{kinit_path_local} -kt {smoke_user_keytab} {smokeuser_principal}; ")
    Execute(kinit_command,user=params.smokeuser)

  # calculate the current hive server version
  current_hiveserver_version = _get_current_hiveserver_version()
  if current_hiveserver_version is None:
    raise Fail('Unable to determine the current HiveServer2 version to deregister.')

  # deregister
  command = 'hive --service hiveserver2 --deregister ' + current_hiveserver_version
  Execute(command, user=params.hive_user, path=params.execute_path, tries=1 )
  def _assert_valid(self):
    source = self.main_resource.resource.source
    type = self.main_resource.resource.type
    target = self.main_resource.resource.target

    if source:
      if not os.path.exists(source):
        raise Fail(format("Source {source} doesn't exist"))
      if type == "directory" and os.path.isfile(source):
        raise Fail(format("Source {source} is file but type is {type}"))
      elif type == "file" and os.path.isdir(source):
        raise Fail(format("Source {source} is directory but type is {type}"))

    self.target_status = self._get_file_status(target)

    if self.target_status and self.target_status['type'].lower() != type:
      raise Fail(format("Trying to create file/directory but directory/file exists in the DFS on {target}"))
Esempio n. 28
0
    def install(self, env):
        import params
        env.set_params(params)
        print 'Install the Sidewinder'
        install_cmd = format("wget -O /tmp/sidewinder.rpm http://search.maven.org/remotecontent?filepath=com/srotya/sidewinder/sidewinder-cluster-dist/${project.version}/sidewinder-cluster-dist-${project.version}.rpm; rpm -ivf /tmp/sidewinder.rpm")
#        stop_cmd = format("rpm -ivf /tmp/sidewinder.rpm")
        Execute(install_cmd)
        self.install_packages(env)
Esempio n. 29
0
def create_topology_script():
  import params
  # installing the topology script to the specified location
  File(params.net_topology_script_file_path,
       content=StaticFile('topology_script.py'),
       mode=0755,
       only_if=format("test -d {net_topology_script_dir}"),
  )
Esempio n. 30
0
  def service_check(self, env):
    import params
    env.set_params(params)

    path_to_tez_jar = format(params.tez_examples_jar)
    wordcount_command = format("jar {path_to_tez_jar} orderedwordcount /tmp/tezsmokeinput/sample-tez-test /tmp/tezsmokeoutput/")
    test_command = format("fs -test -e /tmp/tezsmokeoutput/_SUCCESS")

    File(format("{tmp_dir}/sample-tez-test"),
      content = "foo\nbar\nfoo\nbar\nfoo",
      mode = 0755
    )

    params.HdfsResource("/tmp/tezsmokeinput",
      action = "create_on_execute",
      type = "directory",
      owner = params.smokeuser,
    )
    params.HdfsResource("/tmp/tezsmokeinput/sample-tez-test",
      action = "create_on_execute",
      type = "file",
      owner = params.smokeuser,
      source = format("{tmp_dir}/sample-tez-test"),
    )

    if params.hdp_stack_version and compare_versions(params.hdp_stack_version, '2.2.0.0') >= 0:
      copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)

    params.HdfsResource(None, action = "execute")

    ExecuteHadoop(wordcount_command,
      tries = 3,
      try_sleep = 5,
      user = params.smokeuser,
      conf_dir = params.hadoop_conf_dir,
      bin_dir = params.hadoop_bin_dir
    )

    ExecuteHadoop(test_command,
      tries = 10,
      try_sleep = 6,
      user = params.smokeuser,
      conf_dir = params.hadoop_conf_dir,
      bin_dir = params.hadoop_bin_dir
    )
Esempio n. 31
0
from resource_management.libraries.script.script import Script

# a map of the Ambari role to the component name
# for use with <stack-root>/current/<component>
SERVER_ROLE_DIRECTORY_MAP = {
    'ACCUMULO_MASTER': 'accumulo-master',
    'ACCUMULO_MONITOR': 'accumulo-monitor',
    'ACCUMULO_GC': 'accumulo-gc',
    'ACCUMULO_TRACER': 'accumulo-tracer',
    'ACCUMULO_TSERVER': 'accumulo-tablet',
    'ACCUMULO_CLIENT': 'accumulo-client'
}

component_directory = Script.get_component_from_role(SERVER_ROLE_DIRECTORY_MAP,
                                                     "ACCUMULO_CLIENT")

config = Script.get_config()
stack_root = Script.get_stack_root()

conf_dir = format('{stack_root}/current/{component_directory}/conf')
server_conf_dir = format('{conf_dir}/server')
pid_dir = config['configurations']['accumulo-env']['accumulo_pid_dir']
accumulo_user = config['configurations']['accumulo-env']['accumulo_user']

# Security related/required params
hostname = config['hostname']
security_enabled = config['configurations']['cluster-env']['security_enabled']
kinit_path_local = get_kinit_path(
    default('/configurations/kerberos-env/executable_search_paths', None))
tmp_dir = Script.get_tmp_dir()
Esempio n. 32
0
    _atlas_principal_name = config['configurations']['application-properties'][
        'atlas.authentication.principal']
    atlas_jaas_principal = _atlas_principal_name.replace(
        '_HOST', _hostname_lowercase)
    atlas_keytab_path = config['configurations']['application-properties'][
        'atlas.authentication.keytab']

# New Cluster Stack Version that is defined during the RESTART of a Stack Upgrade
version = default("/commandParams/version", None)

# stack version
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)

metadata_home = os.environ[
    'METADATA_HOME_DIR'] if 'METADATA_HOME_DIR' in os.environ else format(
        '{stack_root}/current/atlas-server')
metadata_bin = format("{metadata_home}/bin")

python_binary = os.environ[
    'PYTHON_EXE'] if 'PYTHON_EXE' in os.environ else sys.executable
metadata_start_script = format("{metadata_bin}/atlas_start.py")
metadata_stop_script = format("{metadata_bin}/atlas_stop.py")

# metadata local directory structure
log_dir = config['configurations']['atlas-env']['metadata_log_dir']

# service locations
hadoop_conf_dir = os.path.join(
    os.environ["HADOOP_HOME"],
    "conf") if 'HADOOP_HOME' in os.environ else '/etc/hadoop/conf'
Esempio n. 33
0
        ranger_plugin_config['policy.download.auth.users'] = kafka_user
        ranger_plugin_config['tag.download.auth.users'] = kafka_user
        ranger_plugin_config['ambari.service.check.user'] = policy_user

    #For curl command in ranger plugin to get db connector
    jdk_location = config['hostLevelParams']['jdk_location']
    java_share_dir = '/usr/share/java'
    previous_jdbc_jar_name = None

    if stack_supports_ranger_audit_db:
        if xa_audit_db_flavor and xa_audit_db_flavor == 'mysql':
            jdbc_jar_name = default("/hostLevelParams/custom_mysql_jdbc_name",
                                    None)
            previous_jdbc_jar_name = default(
                "/hostLevelParams/previous_custom_mysql_jdbc_name", None)
            audit_jdbc_url = format(
                'jdbc:mysql://{xa_db_host}/{xa_audit_db_name}')
            jdbc_driver = "com.mysql.jdbc.Driver"
        elif xa_audit_db_flavor and xa_audit_db_flavor == 'oracle':
            jdbc_jar_name = default("/hostLevelParams/custom_oracle_jdbc_name",
                                    None)
            previous_jdbc_jar_name = default(
                "/hostLevelParams/previous_custom_oracle_jdbc_name", None)
            colon_count = xa_db_host.count(':')
            if colon_count == 2 or colon_count == 0:
                audit_jdbc_url = format('jdbc:oracle:thin:@{xa_db_host}')
            else:
                audit_jdbc_url = format('jdbc:oracle:thin:@//{xa_db_host}')
            jdbc_driver = "oracle.jdbc.OracleDriver"
        elif xa_audit_db_flavor and xa_audit_db_flavor == 'postgres':
            jdbc_jar_name = default(
                "/hostLevelParams/custom_postgres_jdbc_name", None)
Esempio n. 34
0
 def stop(self, env):
     import params
     env.set_params(params)
     stop_cmd = format("service sidewinder stop")
     Execute(stop_cmd)
     print 'Stop Sidewinder'
Esempio n. 35
0
def get_daemon_cmd(params=None, node_type=None, command=None):
    return format(
        'source {params.druid_conf_dir}/druid-env.sh ; {params.druid_home}/bin/node.sh {node_type} {command}'
    )
Esempio n. 36
0
# hadoop default parameters
mapreduce_libs_path = "/usr/lib/hadoop-mapreduce/*"
hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")
hadoop_bin = stack_select.get_hadoop_dir("sbin")
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_home = stack_select.get_hadoop_dir("home")
hadoop_secure_dn_user = hdfs_user
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
hadoop_conf_secure_dir = os.path.join(hadoop_conf_dir, "secure")
hadoop_lib_home = stack_select.get_hadoop_dir("lib")

# hadoop parameters for stacks that support rolling_upgrade
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    mapreduce_libs_path = format(
        "{stack_root}/current/hadoop-mapreduce-client/*")

    if not security_enabled:
        hadoop_secure_dn_user = '******'
    else:
        dfs_dn_port = utils.get_port(dfs_dn_addr)
        dfs_dn_http_port = utils.get_port(dfs_dn_http_addr)
        dfs_dn_https_port = utils.get_port(dfs_dn_https_addr)
        # We try to avoid inability to start datanode as a plain user due to usage of root-owned ports
        if dfs_http_policy == "HTTPS_ONLY":
            secure_dn_ports_are_in_use = utils.is_secure_port(
                dfs_dn_port) or utils.is_secure_port(dfs_dn_https_port)
        elif dfs_http_policy == "HTTP_AND_HTTPS":
            secure_dn_ports_are_in_use = utils.is_secure_port(
                dfs_dn_port) or utils.is_secure_port(
                    dfs_dn_http_port) or utils.is_secure_port(
Esempio n. 37
0
def spark_service(name, upgrade_type=None, action=None):
  import params

  if action == 'start':

    effective_version = params.version if upgrade_type is not None else params.stack_version_formatted
    if effective_version:
      effective_version = format_stack_version(effective_version)

    if effective_version and check_stack_feature(StackFeature.SPARK_16PLUS, effective_version):
      # copy spark-hdp-assembly.jar to hdfs
      copy_to_hdfs("spark", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      # create spark history directory
      params.HdfsResource(params.spark_history_dir,
                          type="directory",
                          action="create_on_execute",
                          owner=params.spark_user,
                          group=params.user_group,
                          mode=0777,
                          recursive_chmod=True
                          )
      params.HdfsResource(None, action="execute")

    if params.security_enabled:
      spark_kinit_cmd = format("{kinit_path_local} -kt {spark_kerberos_keytab} {spark_principal}; ")
      Execute(spark_kinit_cmd, user=params.spark_user)

    # Spark 1.3.1.2.3, and higher, which was included in HDP 2.3, does not have a dependency on Tez, so it does not
    # need to copy the tarball, otherwise, copy it.
    if params.stack_version_formatted and check_stack_feature(StackFeature.TEZ_FOR_SPARK, params.stack_version_formatted):
      resource_created = copy_to_hdfs("tez", params.user_group, params.hdfs_user, host_sys_prepped=params.host_sys_prepped)
      if resource_created:
        params.HdfsResource(None, action="execute")

    if name == 'jobhistoryserver':
      historyserver_no_op_test = format(
      'ls {spark_history_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_history_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_history_server_start}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=historyserver_no_op_test)
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise

    elif name == 'sparkthriftserver':
      if params.security_enabled:
        hive_principal = params.hive_kerberos_principal.replace('_HOST', socket.getfqdn().lower())
        hive_kinit_cmd = format("{kinit_path_local} -kt {hive_kerberos_keytab} {hive_principal}; ")
        Execute(hive_kinit_cmd, user=params.hive_user)

      thriftserver_no_op_test = format(
      'ls {spark_thrift_server_pid_file} >/dev/null 2>&1 && ps -p `cat {spark_thrift_server_pid_file}` >/dev/null 2>&1')
      try:
        Execute(format('{spark_thrift_server_start} --properties-file {spark_thrift_server_conf_file} {spark_thrift_cmd_opts_properties}'),
                user=params.hive_user,
                environment={'JAVA_HOME': params.java_home},
                not_if=thriftserver_no_op_test
        )
      except:
        show_logs(params.spark_log_dir, user=params.hive_user)
        raise
  elif action == 'stop':
    if name == 'jobhistoryserver':
      try:
        Execute(format('{spark_history_server_stop}'),
                user=params.spark_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.spark_user)
        raise
      File(params.spark_history_server_pid_file,
        action="delete"
      )

    elif name == 'sparkthriftserver':
      try:
        Execute(format('{spark_thrift_server_stop}'),
                user=params.hive_user,
                environment={'JAVA_HOME': params.java_home}
        )
      except:
        show_logs(params.spark_log_dir, user=params.hive_user)
        raise
      File(params.spark_thrift_server_pid_file,
        action="delete"
      )
Esempio n. 38
0
    def _verify_llap_app_status(self, llap_app_info, llap_app_name,
                                return_immediately_if_stopped, curr_time):
        if llap_app_info is None or 'state' not in llap_app_info:
            Logger.error(
                "Malformed JSON data received for LLAP app. Exiting ....")
            return False

        # counters based on various states.
        live_instances = 0
        desired_instances = 0
        percent_desired_instances_to_be_up = 80  # Used in 'RUNNING_PARTIAL' state.
        if return_immediately_if_stopped and (llap_app_info['state'].upper()
                                              in ('APP_NOT_FOUND',
                                                  'COMPLETE')):
            return False
        if llap_app_info['state'].upper() == 'RUNNING_ALL':
            Logger.info("LLAP app '{0}' in '{1}' state.".format(
                llap_app_name, llap_app_info['state']))
            return True
        elif llap_app_info['state'].upper() == 'RUNNING_PARTIAL':
            # Check how many instances were up.
            if 'liveInstances' in llap_app_info and 'desiredInstances' in llap_app_info:
                live_instances = llap_app_info['liveInstances']
                desired_instances = llap_app_info['desiredInstances']
            else:
                Logger.info(
                  "LLAP app '{0}' is in '{1}' state, but 'instances' information not available in JSON received. " \
                  "Exiting ....".format(llap_app_name, llap_app_info['state']))
                Logger.info(llap_app_info)
                return False
            if desired_instances == 0:
                Logger.info(
                    "LLAP app '{0}' desired instance are set to 0. Exiting ...."
                    .format(llap_app_name))
                return False

            percentInstancesUp = 0
            if live_instances > 0:
                percentInstancesUp = float(
                    live_instances) / desired_instances * 100
            if percentInstancesUp >= percent_desired_instances_to_be_up:
                Logger.info("LLAP app '{0}' in '{1}' state. Live Instances : '{2}'  >= {3}% of Desired Instances : " \
                            "'{4}'.".format(llap_app_name, llap_app_info['state'],
                                            llap_app_info['liveInstances'],
                                            percent_desired_instances_to_be_up,
                                            llap_app_info['desiredInstances']))
                return True
            else:
                Logger.info("LLAP app '{0}' in '{1}' state. Live Instances : '{2}'. Desired Instances : " \
                            "'{3}' after {4} secs.".format(llap_app_name, llap_app_info['state'],
                                                           llap_app_info['liveInstances'],
                                                           llap_app_info['desiredInstances'],
                                                           time.time() - curr_time))
                raise Fail(
                    "App state is RUNNING_PARTIAL. Live Instances : '{0}', Desired Instance : '{1}'"
                    .format(llap_app_info['liveInstances'],
                            llap_app_info['desiredInstances']))
        elif llap_app_info['state'].upper() in [
                'APP_NOT_FOUND', 'LAUNCHING', 'COMPLETE'
        ]:
            status_str = format("LLAP app '{0}' current state is {1}.".format(
                llap_app_name, llap_app_info['state']))
            Logger.info(status_str)
            raise Fail(status_str)
        else:  # Covers any unknown that we get.
            Logger.info(
                "LLAP app '{0}' current state is '{1}'. Expected : 'RUNNING'.".
                format(llap_app_name, llap_app_info['state']))
            return False
Esempio n. 39
0
java_share_dir = '/usr/share/java'

if has_ranger_admin:
    enable_ranger_nifi = (
        config['configurations']['ranger-nifi-plugin-properties']['ranger-nifi-plugin-enabled'].lower() == 'yes')
    xa_audit_db_password = unicode(
        config['configurations']['admin-properties']['audit_db_password']) if stack_supports_ranger_audit_db else None
    repo_config_password = unicode(config['configurations']['ranger-env']['admin_password'])
    xa_audit_db_flavor = (config['configurations']['admin-properties']['DB_FLAVOR']).lower()
    previous_jdbc_jar_name = None

    if stack_supports_ranger_audit_db:
        if xa_audit_db_flavor == 'mysql':
            jdbc_jar_name = default("/hostLevelParams/custom_mysql_jdbc_name", None)
            previous_jdbc_jar_name = default("/hostLevelParams/previous_custom_mysql_jdbc_name", None)
            audit_jdbc_url = format('jdbc:mysql://{xa_db_host}/{xa_audit_db_name}')
            jdbc_driver = "com.mysql.jdbc.Driver"
        elif xa_audit_db_flavor == 'oracle':
            jdbc_jar_name = default("/hostLevelParams/custom_oracle_jdbc_name", None)
            previous_jdbc_jar_name = default("/hostLevelParams/previous_custom_oracle_jdbc_name", None)
            colon_count = xa_db_host.count(':')
            if colon_count == 2 or colon_count == 0:
                audit_jdbc_url = format('jdbc:oracle:thin:@{xa_db_host}')
            else:
                audit_jdbc_url = format('jdbc:oracle:thin:@//{xa_db_host}')
            jdbc_driver = "oracle.jdbc.OracleDriver"
        elif xa_audit_db_flavor == 'postgres':
            jdbc_jar_name = default("/hostLevelParams/custom_postgres_jdbc_name", None)
            previous_jdbc_jar_name = default("/hostLevelParams/previous_custom_postgres_jdbc_name", None)
            audit_jdbc_url = format('jdbc:postgresql://{xa_db_host}/{xa_audit_db_name}')
            jdbc_driver = "org.postgresql.Driver"
Esempio n. 40
0
    SERVER_ROLE_DIRECTORY_MAP, "HIVE_SERVER_INTERACTIVE")

config = Script.get_config()

stack_root = Script.get_stack_root()
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted_major = format_stack_version(stack_version_unformatted)

if OSCheck.is_windows_family():
    hive_metastore_win_service_name = "metastore"
    hive_client_win_service_name = "hwi"
    hive_server_win_service_name = "hiveserver2"
    webhcat_server_win_service_name = "templeton"
else:
    hive_pid_dir = config['configurations']['hive-env']['hive_pid_dir']
    hive_pid = format("{hive_pid_dir}/hive-server.pid")
    hive_interactive_pid = format("{hive_pid_dir}/hive-interactive.pid")
    hive_metastore_pid = format("{hive_pid_dir}/hive.pid")

    hcat_pid_dir = config['configurations']['hive-env'][
        'hcat_pid_dir']  #hcat_pid_dir
    webhcat_pid_file = format('{hcat_pid_dir}/webhcat.pid')

    process_name = 'mysqld'
    if OSCheck.is_suse_family() or OSCheck.is_ubuntu_family():
        daemon_name = 'mysql'
    else:
        daemon_name = 'mysqld'

    # Security related/required params
    hostname = config['hostname']
Esempio n. 41
0
hadoop_lib_home = stack_select.get_hadoop_dir("lib")

#hadoop params
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    stack_version = None
    upgrade_stack = stack_select._get_upgrade_stack()
    if upgrade_stack is not None and len(
            upgrade_stack) == 2 and upgrade_stack[1] is not None:
        stack_version = upgrade_stack[1]

    # oozie-server or oozie-client, depending on role
    oozie_root = status_params.component_directory

    # using the correct oozie root dir, format the correct location
    oozie_lib_dir = format("{stack_root}/current/{oozie_root}")
    oozie_setup_sh = format(
        "{stack_root}/current/{oozie_root}/bin/oozie-setup.sh")
    oozie_webapps_dir = format(
        "{stack_root}/current/{oozie_root}/oozie-server/webapps")
    oozie_webapps_conf_dir = format(
        "{stack_root}/current/{oozie_root}/oozie-server/conf")
    oozie_libext_dir = format("{stack_root}/current/{oozie_root}/libext")
    oozie_server_dir = format("{stack_root}/current/{oozie_root}/oozie-server")
    oozie_shared_lib = format("{stack_root}/current/{oozie_root}/share")
    oozie_home = format("{stack_root}/current/{oozie_root}")
    oozie_bin_dir = format("{stack_root}/current/{oozie_root}/bin")
    oozie_examples_regex = format("{stack_root}/current/{oozie_root}/doc")

    # set the falcon home for copying JARs; if in an upgrade, then use the version of falcon that
    # matches the version of oozie
Esempio n. 42
0
if has_zk_host:
    if 'zoo.cfg' in config['configurations'] and 'clientPort' in config[
            'configurations']['zoo.cfg']:
        zookeeper_clientPort = config['configurations']['zoo.cfg'][
            'clientPort']
    else:
        zookeeper_clientPort = '2181'
    zookeeper_quorum = (':' + zookeeper_clientPort + ',').join(
        config['clusterHostInfo']['zookeeper_hosts'])
    # last port config
    zookeeper_quorum += ':' + zookeeper_clientPort

#hadoop params

if has_namenode or dfs_type == 'HCFS':
    hadoop_tmp_dir = format("/tmp/hadoop-{hdfs_user}")
    hadoop_conf_dir = conf_select.get_hadoop_conf_dir(
        force_latest_on_upgrade=True)
    task_log4j_properties_location = os.path.join(hadoop_conf_dir,
                                                  "task-log4j.properties")

hadoop_pid_dir_prefix = config['configurations']['hadoop-env'][
    'hadoop_pid_dir_prefix']
hdfs_log_dir_prefix = config['configurations']['hadoop-env'][
    'hdfs_log_dir_prefix']
hbase_tmp_dir = "/tmp/hbase-hbase"
#db params
server_db_name = config['hostLevelParams']['db_name']
db_driver_filename = config['hostLevelParams']['db_driver_filename']
oracle_driver_url = config['hostLevelParams']['oracle_jdbc_url']
mysql_driver_url = config['hostLevelParams']['mysql_jdbc_url']
Esempio n. 43
0
stack_supports_ranger_kerberos = stack_version_formatted and check_stack_feature(
    StackFeature.RANGER_KERBEROS_SUPPORT, stack_version_formatted)

# hadoop default parameters
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
daemon_script = "/usr/lib/hbase/bin/hbase-daemon.sh"
region_mover = "/usr/lib/hbase/bin/region_mover.rb"
region_drainer = "/usr/lib/hbase/bin/draining_servers.rb"
hbase_cmd = "/usr/lib/hbase/bin/hbase"
hbase_max_direct_memory_size = None

# hadoop parameters for stacks supporting rolling_upgrade
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    daemon_script = format(
        '{stack_root}/current/hbase-client/bin/hbase-daemon.sh')
    region_mover = format(
        '{stack_root}/current/hbase-client/bin/region_mover.rb')
    region_drainer = format(
        '{stack_root}/current/hbase-client/bin/draining_servers.rb')
    hbase_cmd = format('{stack_root}/current/hbase-client/bin/hbase')

    hbase_max_direct_memory_size = default(
        'configurations/hbase-env/hbase_max_direct_memory_size', None)

    daemon_script = format(
        "{stack_root}/current/{component_directory}/bin/hbase-daemon.sh")
    region_mover = format(
        "{stack_root}/current/{component_directory}/bin/region_mover.rb")
    region_drainer = format(
        "{stack_root}/current/{component_directory}/bin/draining_servers.rb")
Esempio n. 44
0
    def _llap_start(self, env, cleanup=False):
        import params
        env.set_params(params)

        if params.hive_server_interactive_ha:
            """
        Check llap app state
        """
            Logger.info(
                "HSI HA is enabled. Checking if LLAP is already running ...")
            if params.stack_supports_hive_interactive_ga:
                status = self.check_llap_app_status_in_llap_ga(
                    params.llap_app_name, 2, params.hive_server_interactive_ha)
            else:
                status = self.check_llap_app_status_in_llap_tp(
                    params.llap_app_name, 2, params.hive_server_interactive_ha)

            if status:
                Logger.info("LLAP app '{0}' is already running.".format(
                    params.llap_app_name))
                return True
            else:
                Logger.info(
                    "LLAP app '{0}' is not running. llap will be started.".
                    format(params.llap_app_name))
            pass

        # Call for cleaning up the earlier run(s) LLAP package folders.
        self._cleanup_past_llap_package_dirs()

        Logger.info("Starting LLAP")
        LLAP_PACKAGE_CREATION_PATH = Script.get_tmp_dir()

        unique_name = "llap-slider%s" % datetime.utcnow().strftime(
            '%Y-%m-%d_%H-%M-%S')

        cmd = format(
            "{stack_root}/current/hive-server2-hive2/bin/hive --service llap --slider-am-container-mb {params.slider_am_container_mb} "
            "--size {params.llap_daemon_container_size}m --cache {params.hive_llap_io_mem_size}m --xmx {params.llap_heap_size}m "
            "--loglevel {params.llap_log_level} {params.llap_extra_slider_opts} --output {LLAP_PACKAGE_CREATION_PATH}/{unique_name}"
        )

        # Append params that are supported from Hive llap GA version.
        if params.stack_supports_hive_interactive_ga:
            # Figure out the Slider Anti-affinity to be used.
            # YARN does not support anti-affinity, and therefore Slider implements AA by the means of exclusion lists, i.e, it
            # starts containers one by one and excludes the nodes it gets (adding a delay of ~2sec./machine). When the LLAP
            # container memory size configuration is more than half of YARN node memory, AA is implicit and should be avoided.
            slider_placement = 4
            if long(params.llap_daemon_container_size) > (
                    0.5 * long(params.yarn_nm_mem)):
                slider_placement = 0
                Logger.info(
                    "Setting slider_placement : 0, as llap_daemon_container_size : {0} > 0.5 * "
                    "YARN NodeManager Memory({1})".format(
                        params.llap_daemon_container_size, params.yarn_nm_mem))
            else:
                Logger.info(
                    "Setting slider_placement: 4, as llap_daemon_container_size : {0} <= 0.5 * "
                    "YARN NodeManager Memory({1})".format(
                        params.llap_daemon_container_size, params.yarn_nm_mem))
            cmd += format(
                " --slider-placement {slider_placement} --skiphadoopversion --skiphbasecp --instances {params.num_llap_daemon_running_nodes}"
            )

            # Setup the logger for the ga version only
            cmd += format(" --logger {params.llap_logger}")
        else:
            cmd += format(" --instances {params.num_llap_nodes}")
        if params.security_enabled:
            llap_keytab_splits = params.hive_llap_keytab_file.split("/")
            Logger.debug("llap_keytab_splits : {0}".format(llap_keytab_splits))
            cmd += format(
                " --slider-keytab-dir .slider/keytabs/{params.hive_user}/ --slider-keytab "
                "{llap_keytab_splits[4]} --slider-principal {params.hive_llap_principal}"
            )

        # Add the aux jars if they are specified. If empty, dont need to add this param.
        if params.hive_aux_jars:
            cmd += format(" --auxjars {params.hive_aux_jars}")

        # Append args.
        llap_java_args = InlineTemplate(
            params.llap_app_java_opts).get_content()
        cmd += format(" --args \" {llap_java_args}\"")
        # Append metaspace size to args.
        if params.java_version > 7 and params.llap_daemon_container_size > 4096:
            if params.llap_daemon_container_size <= 32768:
                metaspaceSize = "256m"
            else:
                metaspaceSize = "1024m"
            cmd = cmd[:-1] + " -XX:MetaspaceSize=" + metaspaceSize + "\""

        run_file_path = None
        try:
            Logger.info(format("LLAP start command: {cmd}"))
            code, output, error = shell.checked_call(cmd,
                                                     user=params.hive_user,
                                                     quiet=True,
                                                     stderr=subprocess.PIPE,
                                                     logoutput=True)

            if code != 0 or output is None:
                raise Fail(
                    "Command failed with either non-zero return code or no output."
                )

            # E.g., output:
            # Prepared llap-slider-05Apr2016/run.sh for running LLAP on Slider
            exp = r"Prepared (.*?run.sh) for running LLAP"
            run_file_path = None
            out_splits = output.split("\n")
            for line in out_splits:
                line = line.strip()
                m = re.match(exp, line, re.I)
                if m and len(m.groups()) == 1:
                    run_file_name = m.group(1)
                    run_file_path = os.path.join(params.hive_user_home_dir,
                                                 run_file_name)
                    break
            if not run_file_path:
                raise Fail("Did not find run.sh file in output: " +
                           str(output))

            Logger.info(format("Run file path: {run_file_path}"))
            Execute(run_file_path, user=params.hive_user, logoutput=True)
            Logger.info("Submitted LLAP app name : {0}".format(
                params.llap_app_name))

            # We need to check the status of LLAP app to figure out it got
            # launched properly and is in running state. Then go ahead with Hive Interactive Server start.
            if params.stack_supports_hive_interactive_ga:
                status = self.check_llap_app_status_in_llap_ga(
                    params.llap_app_name,
                    params.num_retries_for_checking_llap_status)
            else:
                status = self.check_llap_app_status_in_llap_tp(
                    params.llap_app_name,
                    params.num_retries_for_checking_llap_status)
            if status:
                Logger.info("LLAP app '{0}' deployed successfully.".format(
                    params.llap_app_name))
                return True
            else:
                Logger.error("LLAP app '{0}' deployment unsuccessful.".format(
                    params.llap_app_name))
                return False
        except:
            # Attempt to clean up the packaged application, or potentially rename it with a .bak
            if run_file_path is not None and cleanup:
                parent_dir = os.path.dirname(run_file_path)
                Directory(
                    parent_dir,
                    action="delete",
                    ignore_failures=True,
                )

            # throw the original exception
            raise
Esempio n. 45
0
 def status(self, env):
     import params
     env.set_params(params)
     status_cmd = format("service sidewinder status")
     Execute(status_cmd)
     print 'Status of the Master'
Esempio n. 46
0
        'flume_principal_name']
    flume_keytab_path = config['configurations']['flume-env'][
        'flume_keytab_path']

stack_version_unformatted = config['clusterLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)

# hadoop default parameters
flume_bin = '/usr/bin/flume-ng'
flume_hive_home = '/usr/lib/hive'
flume_hcat_home = '/usr/lib/hive-hcatalog'

# hadoop parameters for stack supporting rolling upgrade
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    flume_bin = format('{stack_root}/current/flume-server/bin/flume-ng')
    flume_hive_home = format('{stack_root}/current/hive-metastore')
    flume_hcat_home = format('{stack_root}/current/hive-webhcat')

java_home = config['ambariLevelParams']['java_home']
flume_log_dir = config['configurations']['flume-env']['flume_log_dir']
flume_run_dir = config['configurations']['flume-env']['flume_run_dir']
ambari_state_file = format("{flume_run_dir}/ambari-state.txt")

if (('flume-conf' in config['configurations'])
        and ('content' in config['configurations']['flume-conf'])):
    flume_conf_content = config['configurations']['flume-conf']['content']
else:
    flume_conf_content = None

if (('flume-log4j' in config['configurations'])
Esempio n. 47
0
# hadoop default parameters
flume_bin = '/usr/bin/flume-ng'
flume_hive_home = '/usr/lib/hive'
flume_hcat_home = '/usr/lib/hive-hcatalog'

# hadoop parameters for 2.2+
if Script.is_stack_greater_or_equal("2.2"):
  flume_bin = '/usr/hdp/current/flume-server/bin/flume-ng'
  flume_hive_home = '/usr/hdp/current/hive-metastore'
  flume_hcat_home = '/usr/hdp/current/hive-webhcat'

java_home = config['hostLevelParams']['java_home']
flume_log_dir = config['configurations']['flume-env']['flume_log_dir']
flume_run_dir = config['configurations']['flume-env']['flume_run_dir']
ambari_state_file = format("{flume_run_dir}/ambari-state.txt")

if (('flume-conf' in config['configurations']) and('content' in config['configurations']['flume-conf'])):
  flume_conf_content = config['configurations']['flume-conf']['content']
else:
  flume_conf_content = None

if (('flume-log4j' in config['configurations']) and ('content' in config['configurations']['flume-log4j'])):
  flume_log4j_content = config['configurations']['flume-log4j']['content']
else:
  flume_log4j_content = None

targets = default('/commandParams/flume_handler', None)
flume_command_targets = [] if targets is None else targets.split(',')

flume_env_sh_template = config['configurations']['flume-env']['content']
Esempio n. 48
0
from resource_management.libraries.functions import default
from resource_management.libraries.functions import format
from resource_management.libraries.functions import conf_select
from resource_management.libraries.functions import hdp_select
from resource_management.libraries.functions import format_jvm_option
from resource_management.libraries.functions.is_empty import is_empty
from resource_management.libraries.functions.version import format_hdp_stack_version
from resource_management.libraries.functions.version import compare_versions
from ambari_commons.os_check import OSCheck
from ambari_commons.constants import AMBARI_SUDO_BINARY


config = Script.get_config()
tmp_dir = Script.get_tmp_dir()

artifact_dir = format("{tmp_dir}/AMBARI-artifacts/")
jdk_name = default("/hostLevelParams/jdk_name", None)
java_home = config['hostLevelParams']['java_home']
java_version = int(config['hostLevelParams']['java_version'])
jdk_location = config['hostLevelParams']['jdk_location']

sudo = AMBARI_SUDO_BINARY

ambari_server_hostname = config['clusterHostInfo']['ambari_server_host'][0]

stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
hdp_stack_version = format_hdp_stack_version(stack_version_unformatted)

restart_type = default("/commandParams/restart_type", "")
version = default("/commandParams/version", None)
# Handle upgrade and downgrade
Esempio n. 49
0
stack_name = status_params.stack_name
component_directory = status_params.component_directory

# New Cluster Stack Version that is defined during the RESTART of a Rolling Upgrade
version = default("/commandParams/version", None)

# default parameters
zk_home = "/usr"
zk_bin = "/usr/lib/zookeeper/bin"
zk_cli_shell = "/usr/lib/zookeeper/bin/zkCli.sh"
config_dir = "/etc/zookeeper/conf"
zk_smoke_out = os.path.join(tmp_dir, "zkSmoke.out")

# hadoop parameters for stacks that support rolling_upgrade
if stack_version_formatted and check_stack_feature(StackFeature.ROLLING_UPGRADE, stack_version_formatted):
  zk_home = format("{stack_root}/current/{component_directory}")
  zk_bin = format("{stack_root}/current/{component_directory}/bin")
  zk_cli_shell = format("{stack_root}/current/{component_directory}/bin/zkCli.sh")
  config_dir = status_params.config_dir


zk_user = config['configurations']['zookeeper-env']['zk_user']
hostname = config['agentLevelParams']['hostname']
user_group = config['configurations']['cluster-env']['user_group']
zk_env_sh_template = config['configurations']['zookeeper-env']['content']

zk_log_dir = config['configurations']['zookeeper-env']['zk_log_dir']
zk_data_dir = config['configurations']['zoo.cfg']['dataDir']
zk_pid_dir = status_params.zk_pid_dir
zk_pid_file = status_params.zk_pid_file
zk_server_heapsize_value = str(default('configurations/zookeeper-env/zk_server_heapsize', "1024"))
Esempio n. 50
0
# server configurations
config = Script.get_config()
stack_root = Script.get_stack_root()
stack_name = default("/hostLevelParams/stack_name", None)
user_group = config['configurations']['cluster-env']['user_group']

# stack version
stack_version = default("/commandParams/version", None)

hostname = config['hostname']

# status params
status_pid_dir = status_params.superset_pid_dir

superset_home_dir = format("{stack_root}/current/superset")
superset_bin_dir = format("{superset_home_dir}/bin")
superset_log_dir = default("/configurations/superset-env/superset_log_dir",
                           '/var/log/superset')
superset_pid_dir = status_params.superset_pid_dir
superset_config_dir = '/etc/superset/conf'
superset_admin_user = config['configurations']['superset-env'][
    'superset_admin_user']
superset_admin_password = config['configurations']['superset-env'][
    'superset_admin_password']
superset_admin_firstname = config['configurations']['superset-env'][
    'superset_admin_firstname']
superset_admin_lastname = config['configurations']['superset-env'][
    'superset_admin_lastname']
superset_admin_email = config['configurations']['superset-env'][
    'superset_admin_email']
Esempio n. 51
0
# E.g., 2.3
stack_version_unformatted = str(config['hostLevelParams']['stack_version'])
hdp_stack_version = format_hdp_stack_version(stack_version_unformatted)

# This is the version whose state is CURRENT. During an RU, this is the source version.
# DO NOT format it since we need the build number too.
upgrade_from_version = default("/hostLevelParams/current_version", None)

# server configurations
# Default value used in HDP 2.3.0.0 and earlier.

knox_data_dir = '/var/lib/knox/data'

# Important, it has to be strictly greater than 2.3.0.0!!!
if stack_name and stack_name.upper() == "HDP":
    Logger.info(format("HDP version to use is {version_formatted}"))
    if Script.is_hdp_stack_greater(version_formatted, "2.3.0.0"):
        # This is the current version. In the case of a Rolling Upgrade, it will be the newer version.
        # In the case of a Downgrade, it will be the version downgrading to.
        # This is always going to be a symlink to /var/lib/knox/data_${version}
        knox_data_dir = format('/usr/hdp/{version}/knox/data')
        Logger.info(
            format(
                "Detected HDP with stack version {version}, will use knox_data_dir = {knox_data_dir}"
            ))

knox_master_secret_path = format('{knox_data_dir}/security/master')
knox_cert_store_path = format('{knox_data_dir}/security/keystores/gateway.jks')
knox_user = default("/configurations/knox-env/knox_user", "knox")

# server configurations
Esempio n. 52
0
stack_version_unformatted = config['hostLevelParams']['stack_version']
stack_version_formatted = format_stack_version(stack_version_unformatted)

has_secure_user_auth = False
if stack_version_formatted and \
    check_stack_feature(StackFeature.ACCUMULO_KERBEROS_USER_AUTH, stack_version_formatted):
    has_secure_user_auth = True

# configuration directories
conf_dir = status_params.conf_dir
server_conf_dir = status_params.server_conf_dir

# service locations
hadoop_prefix = stack_select.get_hadoop_dir("home")
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
zookeeper_home = format("{stack_root}/current/zookeeper-client")

# the configuration direction for HDFS/YARN/MapR is the hadoop config
# directory, which is symlinked by hadoop-client only
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()

# accumulo local directory structure
log_dir = config['configurations']['accumulo-env']['accumulo_log_dir']
client_script = format("{stack_root}/current/accumulo-client/bin/accumulo")
daemon_script = format("ACCUMULO_CONF_DIR={server_conf_dir} {client_script}")

# user and status
accumulo_user = status_params.accumulo_user
user_group = config['configurations']['cluster-env']['user_group']
pid_dir = status_params.pid_dir
Esempio n. 53
0
def kafka(upgrade_type=None):
    import params
    ensure_base_directories()

    kafka_server_config = mutable_config_dict(
        params.config['configurations']['kafka-broker'])
    # This still has an issue of hostnames being alphabetically out-of-order for broker.id in HDP-2.2.
    # Starting in HDP 2.3, Kafka handles the generation of broker.id so Ambari doesn't have to.

    effective_version = params.stack_version_formatted if upgrade_type is None else format_stack_version(
        params.version)
    Logger.info(format("Effective stack version: {effective_version}"))

    if effective_version is not None and effective_version != "" and \
      check_stack_feature(StackFeature.CREATE_KAFKA_BROKER_ID, effective_version):
        if len(params.kafka_hosts
               ) > 0 and params.hostname in params.kafka_hosts:
            brokerid = str(sorted(params.kafka_hosts).index(params.hostname))
            kafka_server_config['broker.id'] = brokerid
            Logger.info(format("Calculating broker.id as {brokerid}"))

    # listeners and advertised.listeners are only added in 2.3.0.0 onwards.
    if effective_version is not None and effective_version != "" and \
        check_stack_feature(StackFeature.KAFKA_LISTENERS, effective_version):
        listeners = kafka_server_config['listeners'].replace(
            "localhost", params.hostname)
        Logger.info(format("Kafka listeners: {listeners}"))

        if params.security_enabled and params.kafka_kerberos_enabled:
            Logger.info("Kafka kerberos security is enabled.")
            if "SASL" not in listeners:
                listeners = listeners.replace("PLAINTEXT", "PLAINTEXTSASL")

            kafka_server_config['listeners'] = listeners
            kafka_server_config['advertised.listeners'] = listeners
            Logger.info(format("Kafka advertised listeners: {listeners}"))
        else:
            kafka_server_config['listeners'] = listeners

            if 'advertised.listeners' in kafka_server_config:
                advertised_listeners = kafka_server_config[
                    'advertised.listeners'].replace("localhost",
                                                    params.hostname)
                kafka_server_config[
                    'advertised.listeners'] = advertised_listeners
                Logger.info(
                    format(
                        "Kafka advertised listeners: {advertised_listeners}"))
    else:
        kafka_server_config['host.name'] = params.hostname

    if params.has_metric_collector:
        kafka_server_config[
            'kafka.timeline.metrics.host'] = params.metric_collector_host
        kafka_server_config[
            'kafka.timeline.metrics.port'] = params.metric_collector_port
        kafka_server_config[
            'kafka.timeline.metrics.protocol'] = params.metric_collector_protocol
        kafka_server_config[
            'kafka.timeline.metrics.truststore.path'] = params.metric_truststore_path
        kafka_server_config[
            'kafka.timeline.metrics.truststore.type'] = params.metric_truststore_type
        kafka_server_config[
            'kafka.timeline.metrics.truststore.password'] = params.metric_truststore_password

    kafka_data_dir = kafka_server_config['log.dirs']
    kafka_data_dirs = filter(None, kafka_data_dir.split(","))
    Directory(
        kafka_data_dirs,
        mode=0755,
        cd_access='a',
        owner=params.kafka_user,
        group=params.user_group,
        create_parents=True,
        recursive_ownership=True,
    )

    PropertiesFile(
        "server.properties",
        dir=params.conf_dir,
        properties=kafka_server_config,
        owner=params.kafka_user,
        group=params.user_group,
    )

    File(format("{conf_dir}/kafka-env.sh"),
         owner=params.kafka_user,
         content=InlineTemplate(params.kafka_env_sh_template))

    if (params.log4j_props != None):
        File(format("{conf_dir}/log4j.properties"),
             mode=0644,
             group=params.user_group,
             owner=params.kafka_user,
             content=params.log4j_props)

    if params.security_enabled and params.kafka_kerberos_enabled:
        TemplateConfig(format("{conf_dir}/kafka_jaas.conf"),
                       owner=params.kafka_user)

        TemplateConfig(format("{conf_dir}/kafka_client_jaas.conf"),
                       owner=params.kafka_user)

    # On some OS this folder could be not exists, so we will create it before pushing there files
    Directory(params.limits_conf_dir,
              create_parents=True,
              owner='root',
              group='root')

    File(os.path.join(params.limits_conf_dir, 'kafka.conf'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("kafka.conf.j2"))

    File(os.path.join(params.conf_dir, 'tools-log4j.properties'),
         owner='root',
         group='root',
         mode=0644,
         content=Template("tools-log4j.properties.j2"))

    setup_symlink(params.kafka_managed_pid_dir, params.kafka_pid_dir)
    setup_symlink(params.kafka_managed_log_dir, params.kafka_log_dir)
Esempio n. 54
0
stack_supports_ranger_audit_db = check_stack_feature(
    StackFeature.RANGER_AUDIT_DB_SUPPORT, version_for_stack_feature_checks)

# hadoop default parameters
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
daemon_script = "/usr/lib/hbase/bin/hbase-daemon.sh"
region_mover = "/usr/lib/hbase/bin/region_mover.rb"
region_drainer = "/usr/lib/hbase/bin/draining_servers.rb"
hbase_cmd = "/usr/lib/hbase/bin/hbase"
hbase_max_direct_memory_size = None

# hadoop parameters for stacks supporting rolling_upgrade
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    daemon_script = format(
        '{stack_root}/current/hbase-client/bin/hbase-daemon.sh')
    region_mover = format(
        '{stack_root}/current/hbase-client/bin/region_mover.rb')
    region_drainer = format(
        '{stack_root}/current/hbase-client/bin/draining_servers.rb')
    hbase_cmd = format('{stack_root}/current/hbase-client/bin/hbase')

    hbase_max_direct_memory_size = default(
        'configurations/hbase-env/hbase_max_direct_memory_size', None)

    daemon_script = format(
        "{stack_root}/current/{component_directory}/bin/hbase-daemon.sh")
    region_mover = format(
        "{stack_root}/current/{component_directory}/bin/region_mover.rb")
    region_drainer = format(
        "{stack_root}/current/{component_directory}/bin/draining_servers.rb")
Esempio n. 55
0
if has_kafka_host:
    if 'port' in config['configurations']['kafka-broker']:
        kafka_broker_port = config['configurations']['kafka-broker']['port']
    else:
        kafka_broker_port = '6667'
    kafka_brokers = (':' + kafka_broker_port + ',').join(
        config['clusterHostInfo']['kafka_broker_hosts'])
    kafka_brokers += ':' + kafka_broker_port

metron_apps_hdfs_dir = config['configurations']['metron-env'][
    'metron_apps_hdfs_dir']

# the double "format" is not an error - we are pulling in a jinja-templated param. This is a bit of a hack, but works
# well enough until we find a better way via Ambari
metron_apps_indexed_hdfs_dir = format(
    format(config['configurations']['metron-env']
           ['metron_apps_indexed_hdfs_dir']))
metron_topic_retention = config['configurations']['metron-env'][
    'metron_topic_retention']

local_grok_patterns_dir = format("{metron_home}/patterns")
hdfs_grok_patterns_dir = format("{metron_apps_hdfs_dir}/patterns")

# for create_hdfs_directory
security_enabled = config['configurations']['cluster-env']['security_enabled']
hdfs_user_keytab = config['configurations']['hadoop-env']['hdfs_user_keytab']
hdfs_user = config['configurations']['hadoop-env']['hdfs_user']
hdfs_principal_name = config['configurations']['hadoop-env'][
    'hdfs_principal_name']
smokeuser_principal = config['configurations']['cluster-env'][
    'smokeuser_principal_name']
Esempio n. 56
0
def druid(upgrade_type=None, nodeType=None):
    import params
    ensure_base_directories()

    # Environment Variables
    File(format("{params.druid_conf_dir}/druid-env.sh"),
         owner=params.druid_user,
         content=InlineTemplate(params.druid_env_sh_template))

    # common config
    druid_common_config = mutable_config_dict(
        params.config['configurations']['druid-common'])
    # User cannot override below configs
    druid_common_config['druid.host'] = params.hostname
    druid_common_config[
        'druid.extensions.directory'] = params.druid_extensions_dir
    druid_common_config[
        'druid.extensions.hadoopDependenciesDir'] = params.druid_hadoop_dependencies_dir
    druid_common_config[
        'druid.selectors.indexing.serviceName'] = params.config[
            'configurations']['druid-overlord']['druid.service']
    druid_common_config['druid.selectors.coordinator.serviceName'] = \
      params.config['configurations']['druid-coordinator']['druid.service']

    # delete the password and user if empty otherwiswe derby will fail.
    if 'derby' == druid_common_config['druid.metadata.storage.type']:
        del druid_common_config['druid.metadata.storage.connector.user']
        del druid_common_config['druid.metadata.storage.connector.password']

    druid_env_config = mutable_config_dict(
        params.config['configurations']['druid-env'])

    PropertiesFile(
        "common.runtime.properties",
        dir=params.druid_common_conf_dir,
        properties=druid_common_config,
        owner=params.druid_user,
        group=params.user_group,
    )
    Logger.info("Created common.runtime.properties")

    File(format("{params.druid_common_conf_dir}/druid-log4j.xml"),
         mode=0644,
         owner=params.druid_user,
         group=params.user_group,
         content=InlineTemplate(params.log4j_props))
    Logger.info("Created log4j file")

    File("/etc/logrotate.d/druid",
         mode=0644,
         owner='root',
         group='root',
         content=InlineTemplate(params.logrotate_props))

    Logger.info("Created log rotate file")

    # Write Hadoop Configs if configured
    if 'core-site' in params.config['configurations']:
        XmlConfig(
            "core-site.xml",
            conf_dir=params.druid_common_conf_dir,
            configurations=params.config['configurations']['core-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['core-site'],
            owner=params.druid_user,
            group=params.user_group)

    if 'mapred-site' in params.config['configurations']:
        XmlConfig(
            "mapred-site.xml",
            conf_dir=params.druid_common_conf_dir,
            configurations=params.config['configurations']['mapred-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['mapred-site'],
            owner=params.druid_user,
            group=params.user_group)

    if 'yarn-site' in params.config['configurations']:
        XmlConfig(
            "yarn-site.xml",
            conf_dir=params.druid_common_conf_dir,
            configurations=params.config['configurations']['yarn-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['yarn-site'],
            owner=params.druid_user,
            group=params.user_group)

    if 'hdfs-site' in params.config['configurations']:
        XmlConfig(
            "hdfs-site.xml",
            conf_dir=params.druid_common_conf_dir,
            configurations=params.config['configurations']['hdfs-site'],
            configuration_attributes=params.config['configuration_attributes']
            ['hdfs-site'],
            owner=params.druid_user,
            group=params.user_group)

    # node specific configs
    for node_type in [
            'coordinator', 'overlord', 'historical', 'broker', 'middleManager',
            'router'
    ]:
        node_config_dir = format('{params.druid_conf_dir}/{node_type}')
        node_type_lowercase = node_type.lower()

        # Write runtime.properties file
        node_config = mutable_config_dict(params.config['configurations'][
            format('druid-{node_type_lowercase}')])
        PropertiesFile(
            "runtime.properties",
            dir=node_config_dir,
            properties=node_config,
            owner=params.druid_user,
            group=params.user_group,
        )
        Logger.info(
            format("Created druid-{node_type_lowercase} runtime.properties"))

        # Write jvm configs
        File(
            format('{node_config_dir}/jvm.config'),
            owner=params.druid_user,
            group=params.user_group,
            content=InlineTemplate(
                "-server \n-Xms{{node_heap_memory}}m \n-Xmx{{node_heap_memory}}m \n-XX:MaxDirectMemorySize={{node_direct_memory}}m \n-Dlog4j.configurationFile={{log4j_config_file}} \n-Dlog4j.debug \n{{node_jvm_opts}}",
                node_heap_memory=druid_env_config[format(
                    'druid.{node_type_lowercase}.jvm.heap.memory')],
                log4j_config_file=format(
                    "{params.druid_common_conf_dir}/druid-log4j.xml"),
                node_direct_memory=druid_env_config[format(
                    'druid.{node_type_lowercase}.jvm.direct.memory')],
                node_jvm_opts=druid_env_config[format(
                    'druid.{node_type_lowercase}.jvm.opts')]))
        Logger.info(format("Created druid-{node_type_lowercase} jvm.config"))

    # All druid nodes have dependency on hdfs_client
    ensure_hadoop_directories()
    download_database_connector_if_needed()
    # Pull all required dependencies
    pulldeps()
Esempio n. 57
0
    def recommendDruidConfigurations(self, configurations, clusterData,
                                     services, hosts):

        # druid is not in list of services to be installed
        if 'druid-common' not in services['configurations']:
            return

        componentsListList = [
            service["components"] for service in services["services"]
        ]
        componentsList = [
            item["StackServiceComponents"] for sublist in componentsListList
            for item in sublist
        ]
        servicesList = [
            service["StackServices"]["service_name"]
            for service in services["services"]
        ]
        putCommonProperty = self.putProperty(configurations, "druid-common",
                                             services)

        putCommonProperty('druid.zk.service.host',
                          self.getZKHostPortString(services))
        self.recommendDruidMaxMemoryLimitConfigurations(
            configurations, clusterData, services, hosts)

        # recommending the metadata storage uri
        database_name = services['configurations']["druid-common"][
            "properties"]["database_name"]
        metastore_hostname = services['configurations']["druid-common"][
            "properties"]["metastore_hostname"]
        database_type = services['configurations']["druid-common"][
            "properties"]["druid.metadata.storage.type"]
        metadata_storage_port = "1527"
        mysql_module_name = "mysql-metadata-storage"
        postgres_module_name = "postgresql-metadata-storage"
        extensions_load_list = services['configurations']['druid-common'][
            'properties']['druid.extensions.loadList']
        putDruidCommonProperty = self.putProperty(configurations,
                                                  "druid-common", services)

        extensions_load_list = self.removeFromList(extensions_load_list,
                                                   mysql_module_name)
        extensions_load_list = self.removeFromList(extensions_load_list,
                                                   postgres_module_name)

        if database_type == 'mysql':
            metadata_storage_port = "3306"
            extensions_load_list = self.addToList(extensions_load_list,
                                                  mysql_module_name)

        if database_type == 'postgresql':
            extensions_load_list = self.addToList(extensions_load_list,
                                                  postgres_module_name)
            metadata_storage_port = "5432"

        putDruidCommonProperty('druid.metadata.storage.connector.port',
                               metadata_storage_port)
        putDruidCommonProperty(
            'druid.metadata.storage.connector.connectURI',
            self.getMetadataConnectionString(database_type).format(
                metastore_hostname, database_name, metadata_storage_port))
        # HDFS is installed
        if "HDFS" in servicesList and "hdfs-site" in services["configurations"]:
            # recommend HDFS as default deep storage
            extensions_load_list = self.addToList(extensions_load_list,
                                                  "druid-hdfs-storage")
            putCommonProperty("druid.storage.type", "hdfs")
            putCommonProperty("druid.storage.storageDirectory",
                              "/user/druid/data")
            # configure indexer logs configs
            putCommonProperty("druid.indexer.logs.type", "hdfs")
            putCommonProperty("druid.indexer.logs.directory",
                              "/user/druid/logs")

        if "KAFKA" in servicesList:
            extensions_load_list = self.addToList(
                extensions_load_list, "druid-kafka-indexing-service")

        if 'AMBARI_METRICS' in servicesList:
            extensions_load_list = self.addToList(extensions_load_list,
                                                  "ambari-metrics-emitter")

        putCommonProperty('druid.extensions.loadList', extensions_load_list)

        # JVM Configs go to env properties
        putEnvProperty = self.putProperty(configurations, "druid-env",
                                          services)

        # processing thread pool Config
        for component in ['DRUID_HISTORICAL', 'DRUID_BROKER']:
            component_hosts = self.getHostsWithComponent(
                "DRUID", component, services, hosts)
            nodeType = self.DRUID_COMPONENT_NODE_TYPE_MAP[component]
            putComponentProperty = self.putProperty(configurations,
                                                    format("druid-{nodeType}"),
                                                    services)
            if (component_hosts is not None and len(component_hosts) > 0):
                totalAvailableCpu = self.getMinCpu(component_hosts)
                processingThreads = 1
                if totalAvailableCpu > 1:
                    processingThreads = totalAvailableCpu - 1
                putComponentProperty('druid.processing.numThreads',
                                     processingThreads)
                putComponentProperty(
                    'druid.server.http.numThreads',
                    max(10, (totalAvailableCpu * 17) / 16 + 2) + 30)

        # superset is in list of services to be installed
        if 'druid-superset' in services['configurations']:
            # Recommendations for Superset
            superset_database_type = services['configurations'][
                "druid-superset"]["properties"]["SUPERSET_DATABASE_TYPE"]
            putSupersetProperty = self.putProperty(configurations,
                                                   "druid-superset", services)

            if superset_database_type == "mysql":
                putSupersetProperty("SUPERSET_DATABASE_PORT", "3306")
            elif superset_database_type == "postgresql":
                putSupersetProperty("SUPERSET_DATABASE_PORT", "5432")
Esempio n. 58
0
def getPid(params=None, nodeType=None):
    return format('{params.druid_pid_dir}/{nodeType}.pid')
Esempio n. 59
0
# hadoop default parameters
mapreduce_libs_path = "/usr/lib/hadoop-mapreduce/*"
hadoop_libexec_dir = stack_select.get_hadoop_dir("libexec")
hadoop_bin = stack_select.get_hadoop_dir("sbin")
hadoop_bin_dir = stack_select.get_hadoop_dir("bin")
hadoop_home = stack_select.get_hadoop_dir("home")
hadoop_secure_dn_user = hdfs_user
hadoop_conf_dir = conf_select.get_hadoop_conf_dir()
hadoop_conf_secure_dir = os.path.join(hadoop_conf_dir, "secure")
hadoop_lib_home = stack_select.get_hadoop_dir("lib")

# hadoop parameters for stacks that support rolling_upgrade
if stack_version_formatted and check_stack_feature(
        StackFeature.ROLLING_UPGRADE, stack_version_formatted):
    mapreduce_libs_path = format(
        "{stack_root}/current/hadoop-mapreduce-client/*")

    if not security_enabled:
        hadoop_secure_dn_user = '******'
    else:
        dfs_dn_port = utils.get_port(dfs_dn_addr)
        dfs_dn_http_port = utils.get_port(dfs_dn_http_addr)
        dfs_dn_https_port = utils.get_port(dfs_dn_https_addr)
        # We try to avoid inability to start datanode as a plain user due to usage of root-owned ports
        if dfs_http_policy == "HTTPS_ONLY":
            secure_dn_ports_are_in_use = utils.is_secure_port(
                dfs_dn_port) or utils.is_secure_port(dfs_dn_https_port)
        elif dfs_http_policy == "HTTP_AND_HTTPS":
            secure_dn_ports_are_in_use = utils.is_secure_port(
                dfs_dn_port) or utils.is_secure_port(
                    dfs_dn_http_port) or utils.is_secure_port(
Esempio n. 60
0
    def prepare_libext_directory(upgrade_type=None):
        """
    Performs the following actions on libext:
      - creates <stack-root>/current/oozie/libext and recursively
      - set 777 permissions on it and its parents.
      - downloads JDBC driver JAR if needed
      - copies Falcon JAR for the Oozie WAR if needed
    """
        import params

        # some stack versions don't need the lzo compression libraries
        target_version_needs_compression_libraries = params.version and check_stack_feature(
            StackFeature.LZO, params.version)

        # ensure the directory exists
        Directory(params.oozie_libext_dir, mode=0777)

        # get all hadooplzo* JAR files
        # <stack-selector-tool> set hadoop-client has not run yet, therefore we cannot use
        # <stack-root>/current/hadoop-client ; we must use params.version directly
        # however, this only works when upgrading beyond 2.2.0.0; don't do this
        # for downgrade to 2.2.0.0 since hadoop-lzo will not be present
        # This can also be called during a Downgrade.
        # When a version is Installed, it is responsible for downloading the hadoop-lzo packages
        # if lzo is enabled.
        if params.lzo_enabled and (
                params.upgrade_direction == Direction.UPGRADE
                or target_version_needs_compression_libraries):
            hadoop_lzo_pattern = 'hadoop-lzo*.jar'
            hadoop_client_new_lib_dir = format(
                "{stack_root}/{version}/hadoop/lib")

            files = glob.iglob(
                os.path.join(hadoop_client_new_lib_dir, hadoop_lzo_pattern))
            if not files:
                raise Fail("There are no files at {0} matching {1}".format(
                    hadoop_client_new_lib_dir, hadoop_lzo_pattern))

            # copy files into libext
            files_copied = False
            for file in files:
                if os.path.isfile(file):
                    Logger.info("Copying {0} to {1}".format(
                        str(file), params.oozie_libext_dir))
                    shutil.copy2(file, params.oozie_libext_dir)
                    files_copied = True

            if not files_copied:
                raise Fail("There are no files at {0} matching {1}".format(
                    hadoop_client_new_lib_dir, hadoop_lzo_pattern))

        # something like <stack-root>/current/oozie-server/libext/ext-2.2.zip
        oozie_ext_zip_target_path = os.path.join(params.oozie_libext_dir,
                                                 params.ext_js_file)

        # Copy ext ZIP to libext dir
        # Default to /usr/share/$TARGETSTACK-oozie/ext-2.2.zip as the first path
        source_ext_zip_paths = oozie.get_oozie_ext_zip_source_paths(
            upgrade_type, params)

        found_at_least_one_oozie_ext_file = False

        # Copy the first oozie ext-2.2.zip file that is found.
        # This uses a list to handle the cases when migrating from some versions of BigInsights to HDP.
        if source_ext_zip_paths is not None:
            for source_ext_zip_path in source_ext_zip_paths:
                if os.path.isfile(source_ext_zip_path):
                    found_at_least_one_oozie_ext_file = True
                    Logger.info("Copying {0} to {1}".format(
                        source_ext_zip_path, params.oozie_libext_dir))
                    Execute(
                        ("cp", source_ext_zip_path, params.oozie_libext_dir),
                        sudo=True)
                    Execute(("chown", format("{oozie_user}:{user_group}"),
                             oozie_ext_zip_target_path),
                            sudo=True)
                    File(oozie_ext_zip_target_path, mode=0644)
                    break

        if not found_at_least_one_oozie_ext_file:
            raise Fail(
                "Unable to find any Oozie source extension files from the following paths {0}"
                .format(source_ext_zip_paths))

        # Redownload jdbc driver to a new current location
        oozie.download_database_library_if_needed()

        # get the upgrade version in the event that it's needed
        upgrade_stack = stack_select._get_upgrade_stack()
        if upgrade_stack is None or len(
                upgrade_stack) < 2 or upgrade_stack[1] is None:
            raise Fail(
                "Unable to determine the stack that is being upgraded to or downgraded to."
            )

        stack_version = upgrade_stack[1]

        # copy the Falcon JAR if needed; falcon has not upgraded yet, so we must
        # use the versioned falcon directory
        if params.has_falcon_host:
            versioned_falcon_jar_directory = "{0}/{1}/falcon/oozie/ext/falcon-oozie-el-extension-*.jar".format(
                params.stack_root, stack_version)
            Logger.info("Copying {0} to {1}".format(
                versioned_falcon_jar_directory, params.oozie_libext_dir))

            Execute(
                format(
                    '{sudo} cp {versioned_falcon_jar_directory} {oozie_libext_dir}'
                ))
            Execute(
                format(
                    '{sudo} chown {oozie_user}:{user_group} {oozie_libext_dir}/falcon-oozie-el-extension-*.jar'
                ))