Example #1
0
  def rebalancehdfs(self, env):
    from ambari_commons.os_windows import UserHelper, run_os_command_impersonated
    import params
    env.set_params(params)

    hdfs_username, hdfs_domain = UserHelper.parse_user_name(params.hdfs_user, ".")

    name_node_parameters = json.loads( params.name_node_params )
    threshold = name_node_parameters['threshold']
    _print("Starting balancer with threshold = %s\n" % threshold)

    def calculateCompletePercent(first, current):
      return 1.0 - current.bytesLeftToMove/first.bytesLeftToMove

    def startRebalancingProcess(threshold):
      rebalanceCommand = 'hdfs balancer -threshold %s' % threshold
      return ['cmd', '/C', rebalanceCommand]

    command = startRebalancingProcess(threshold)
    basedir = os.path.join(env.config.basedir, 'scripts')

    _print("Executing command %s\n" % command)

    parser = hdfs_rebalance.HdfsParser()
    returncode, stdout, err = run_os_command_impersonated(' '.join(command), hdfs_username, Script.get_password(params.hdfs_user), hdfs_domain)

    for line in stdout.split('\n'):
      _print('[balancer] %s %s' % (str(datetime.now()), line ))
      pl = parser.parseLine(line)
      if pl:
        res = pl.toJson()
        res['completePercent'] = calculateCompletePercent(parser.initialLine, pl)

        self.put_structured_out(res)
      elif parser.state == 'PROCESS_FINISED' :
        _print('[balancer] %s %s' % (str(datetime.now()), 'Process is finished' ))
        self.put_structured_out({'completePercent' : 1})
        break

    if returncode != None and returncode != 0:
      raise Fail('Hdfs rebalance process exited with error. See the log output')
Example #2
0
    def rebalancehdfs(self, env):
        import params
        env.set_params(params)

        name_node_parameters = json.loads(params.name_node_params)
        threshold = name_node_parameters['threshold']
        _print("Starting balancer with threshold = %s\n" % threshold)

        rebalance_env = {'PATH': params.hadoop_bin_dir}

        if params.security_enabled:
            # Create the kerberos credentials cache (ccache) file and set it in the environment to use
            # when executing HDFS rebalance command. Use the md5 hash of the combination of the principal and keytab file
            # to generate a (relatively) unique cache filename so that we can use it as needed.
            # TODO: params.tmp_dir=/var/lib/ambari-agent/tmp. However hdfs user doesn't have access to this path.
            # TODO: Hence using /tmp
            ccache_file_name = "hdfs_rebalance_cc_" + _md5(
                format(
                    "{hdfs_principal_name}|{hdfs_user_keytab}")).hexdigest()
            ccache_file_path = os.path.join(tempfile.gettempdir(),
                                            ccache_file_name)
            rebalance_env['KRB5CCNAME'] = ccache_file_path

            # If there are no tickets in the cache or they are expired, perform a kinit, else use what
            # is in the cache
            klist_cmd = format("{klist_path_local} -s {ccache_file_path}")
            kinit_cmd = format(
                "{kinit_path_local} -c {ccache_file_path} -kt {hdfs_user_keytab} {hdfs_principal_name}"
            )
            if shell.call(klist_cmd, user=params.hdfs_user)[0] != 0:
                Execute(kinit_cmd, user=params.hdfs_user)

        def calculateCompletePercent(first, current):
            # avoid division by zero
            try:
                division_result = current.bytesLeftToMove / first.bytesLeftToMove
            except ZeroDivisionError:
                Logger.warning(
                    "Division by zero. Bytes Left To Move = {0}. Return 1.0".
                    format(first.bytesLeftToMove))
                return 1.0
            return 1.0 - division_result

        def startRebalancingProcess(threshold, rebalance_env):
            rebalanceCommand = format(
                'hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}'
            )
            return as_user(rebalanceCommand,
                           params.hdfs_user,
                           env=rebalance_env)

        command = startRebalancingProcess(threshold, rebalance_env)

        basedir = os.path.join(env.config.basedir, 'scripts')
        if (threshold == 'DEBUG'):  #FIXME TODO remove this on PROD
            basedir = os.path.join(env.config.basedir, 'scripts',
                                   'balancer-emulator')
            command = ['ambari-python-wrap', 'hdfs-command.py']

        _print("Executing command %s\n" % command)

        parser = hdfs_rebalance.HdfsParser()

        def handle_new_line(line, is_stderr):
            if is_stderr:
                return

            _print('[balancer] %s' % (line))
            pl = parser.parseLine(line)
            if pl:
                res = pl.toJson()
                res['completePercent'] = calculateCompletePercent(
                    parser.initialLine, pl)

                self.put_structured_out(res)
            elif parser.state == 'PROCESS_FINISED':
                _print('[balancer] %s' % ('Process is finished'))
                self.put_structured_out({'completePercent': 1})
                return

        Execute(
            command,
            on_new_line=handle_new_line,
            logoutput=False,
        )

        if params.security_enabled:
            # Delete the kerberos credentials cache (ccache) file
            File(
                ccache_file_path,
                action="delete",
            )