def rebalancehdfs(self, env): from ambari_commons.os_windows import UserHelper, run_os_command_impersonated import params env.set_params(params) hdfs_username, hdfs_domain = UserHelper.parse_user_name(params.hdfs_user, ".") name_node_parameters = json.loads( params.name_node_params ) threshold = name_node_parameters['threshold'] _print("Starting balancer with threshold = %s\n" % threshold) def calculateCompletePercent(first, current): return 1.0 - current.bytesLeftToMove/first.bytesLeftToMove def startRebalancingProcess(threshold): rebalanceCommand = 'hdfs balancer -threshold %s' % threshold return ['cmd', '/C', rebalanceCommand] command = startRebalancingProcess(threshold) basedir = os.path.join(env.config.basedir, 'scripts') _print("Executing command %s\n" % command) parser = hdfs_rebalance.HdfsParser() returncode, stdout, err = run_os_command_impersonated(' '.join(command), hdfs_username, Script.get_password(params.hdfs_user), hdfs_domain) for line in stdout.split('\n'): _print('[balancer] %s %s' % (str(datetime.now()), line )) pl = parser.parseLine(line) if pl: res = pl.toJson() res['completePercent'] = calculateCompletePercent(parser.initialLine, pl) self.put_structured_out(res) elif parser.state == 'PROCESS_FINISED' : _print('[balancer] %s %s' % (str(datetime.now()), 'Process is finished' )) self.put_structured_out({'completePercent' : 1}) break if returncode != None and returncode != 0: raise Fail('Hdfs rebalance process exited with error. See the log output')
def rebalancehdfs(self, env): import params env.set_params(params) name_node_parameters = json.loads(params.name_node_params) threshold = name_node_parameters['threshold'] _print("Starting balancer with threshold = %s\n" % threshold) rebalance_env = {'PATH': params.hadoop_bin_dir} if params.security_enabled: # Create the kerberos credentials cache (ccache) file and set it in the environment to use # when executing HDFS rebalance command. Use the md5 hash of the combination of the principal and keytab file # to generate a (relatively) unique cache filename so that we can use it as needed. # TODO: params.tmp_dir=/var/lib/ambari-agent/tmp. However hdfs user doesn't have access to this path. # TODO: Hence using /tmp ccache_file_name = "hdfs_rebalance_cc_" + _md5( format( "{hdfs_principal_name}|{hdfs_user_keytab}")).hexdigest() ccache_file_path = os.path.join(tempfile.gettempdir(), ccache_file_name) rebalance_env['KRB5CCNAME'] = ccache_file_path # If there are no tickets in the cache or they are expired, perform a kinit, else use what # is in the cache klist_cmd = format("{klist_path_local} -s {ccache_file_path}") kinit_cmd = format( "{kinit_path_local} -c {ccache_file_path} -kt {hdfs_user_keytab} {hdfs_principal_name}" ) if shell.call(klist_cmd, user=params.hdfs_user)[0] != 0: Execute(kinit_cmd, user=params.hdfs_user) def calculateCompletePercent(first, current): # avoid division by zero try: division_result = current.bytesLeftToMove / first.bytesLeftToMove except ZeroDivisionError: Logger.warning( "Division by zero. Bytes Left To Move = {0}. Return 1.0". format(first.bytesLeftToMove)) return 1.0 return 1.0 - division_result def startRebalancingProcess(threshold, rebalance_env): rebalanceCommand = format( 'hdfs --config {hadoop_conf_dir} balancer -threshold {threshold}' ) return as_user(rebalanceCommand, params.hdfs_user, env=rebalance_env) command = startRebalancingProcess(threshold, rebalance_env) basedir = os.path.join(env.config.basedir, 'scripts') if (threshold == 'DEBUG'): #FIXME TODO remove this on PROD basedir = os.path.join(env.config.basedir, 'scripts', 'balancer-emulator') command = ['ambari-python-wrap', 'hdfs-command.py'] _print("Executing command %s\n" % command) parser = hdfs_rebalance.HdfsParser() def handle_new_line(line, is_stderr): if is_stderr: return _print('[balancer] %s' % (line)) pl = parser.parseLine(line) if pl: res = pl.toJson() res['completePercent'] = calculateCompletePercent( parser.initialLine, pl) self.put_structured_out(res) elif parser.state == 'PROCESS_FINISED': _print('[balancer] %s' % ('Process is finished')) self.put_structured_out({'completePercent': 1}) return Execute( command, on_new_line=handle_new_line, logoutput=False, ) if params.security_enabled: # Delete the kerberos credentials cache (ccache) file File( ccache_file_path, action="delete", )