def __init__(self, cluster, service, component, globalConfig, config): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig versionsFileDir = config.get('agent', 'prefix') self.versionsHandler = StackVersionsFileHandler(versionsFileDir) self.actualConfigHandler = ActualConfigHandler(config)
def __init__(self, cluster, service, component, globalConfig, config, configTags): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig self.configTags = configTags self.actualConfigHandler = ActualConfigHandler(config, configTags)
def __init__(self, cluster, service, component, globalConfig, config, configTags): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig versionsFileDir = config.get('agent', 'prefix') self.versionsHandler = StackVersionsFileHandler(versionsFileDir) self.configTags = configTags self.actualConfigHandler = ActualConfigHandler(config, configTags)
class LiveStatus: SERVICES = [] CLIENT_COMPONENTS = [] COMPONENTS = [] LIVE_STATUS = "STARTED" DEAD_STATUS = "INSTALLED" def __init__(self, cluster, service, component, globalConfig, config, configTags): self.logger = logging.getLogger() self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig self.configTags = configTags self.actualConfigHandler = ActualConfigHandler(config, configTags) def build(self, component_status): """ :param component_status: component status to include into report :return: populated livestatus dict """ livestatus = { "componentName": self.component, "msg": "", "status": component_status, "clusterName": self.cluster, "serviceName": self.service, "stackVersion": "" # TODO: populate ? } active_config = self.actualConfigHandler.read_actual_component( self.component) if active_config is not None: livestatus['configurationTags'] = active_config self.logger.debug( "The live status for component %s of service %s is %s", self.component, self.service, livestatus) return livestatus
class LiveStatus: LIVE_STATUS = "STARTED" DEAD_STATUS = "INSTALLED" def __init__(self, cluster, service, component, globalConfig, config, configTags): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig self.configTags = configTags self.actualConfigHandler = ActualConfigHandler(config, configTags) def build(self, component_status): """ :param component_status: component status to include into report :return: populated livestatus dict """ global LIVE_STATUS, DEAD_STATUS livestatus = { "componentName": self.component, "msg": "", "status": component_status, "clusterName": self.cluster, "serviceName": self.service, "stackVersion": "" # TODO: populate ? } active_config = self.actualConfigHandler.read_actual_component( self.component) if active_config is not None: livestatus['configurationTags'] = active_config logger.debug("The live status for component " + str(self.component) + " of service " + str(self.service) + " is " + str(livestatus)) return livestatus
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] isCommandBackground = command[ 'commandType'] == self.BACKGROUND_EXECUTION_COMMAND isAutoExecuteCommand = command[ 'commandType'] == self.AUTO_EXECUTION_COMMAND message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), taskId = str(command['taskId']), role=command['role'], cluster=clusterName) logger.info(message) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template( command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini if not isAutoExecuteCommand: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut': self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) else: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt', 'structuredOut': self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) numAttempts = 0 retryDuration = 0 # even with 0 allow one attempt retryAble = False delay = 1 log_command_output = True if 'commandParams' in command and 'log_output' in command[ 'commandParams'] and "false" == command['commandParams'][ 'log_output']: log_command_output = False if 'commandParams' in command: if 'max_duration_for_retries' in command['commandParams']: retryDuration = int( command['commandParams']['max_duration_for_retries']) if 'command_retry_enabled' in command['commandParams']: retryAble = command['commandParams'][ 'command_retry_enabled'] == "true" if isAutoExecuteCommand: retryAble = False logger.info( "Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}, log_output = {log_command_output}" .format(taskId=taskId, retryAble=retryAble, retryDuration=retryDuration, log_command_output=log_command_output)) while retryDuration >= 0: numAttempts += 1 start = 0 if retryAble: start = int(time.time()) # running command commandresult = self.customServiceOrchestrator.runCommand( command, in_progress_status['tmpout'], in_progress_status['tmperr'], override_output_files=numAttempts == 1, retry=numAttempts > 1) end = 1 if retryAble: end = int(time.time()) retryDuration -= (end - start) # dumping results if isCommandBackground: logger.info( "Command is background command, quit retrying. Exit code: {exitCode}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}" .format(cid=taskId, exitCode=commandresult['exitcode'], retryAble=retryAble, retryDuration=retryDuration, delay=delay)) return else: if commandresult['exitcode'] == 0: status = self.COMPLETED_STATUS else: status = self.FAILED_STATUS if (commandresult['exitcode'] == -signal.SIGTERM) or (commandresult['exitcode'] == -signal.SIGKILL): logger.info( 'Command {cid} was canceled!'.format(cid=taskId)) break if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0: delay = self.get_retry_delay(delay) if delay > retryDuration: delay = retryDuration retryDuration -= delay # allow one last attempt commandresult[ 'stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n" logger.info( "Retrying command id {cid} after a wait of {delay}".format( cid=taskId, delay=delay)) time.sleep(delay) continue else: logger.info( "Quit retrying for command id {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}" .format(cid=taskId, status=status, retryAble=retryAble, retryDuration=retryDuration, delay=delay)) break # final result to stdout commandresult[ 'stdout'] += '\n\nCommand completed successfully!\n' if status == self.COMPLETED_STATUS else '\n\nCommand failed after ' + str( numAttempts) + ' tries\n' logger.info('Command {cid} completed successfully!'.format( cid=taskId) if status == self.COMPLETED_STATUS else 'Command {cid} failed after {attempts} tries'. format(cid=taskId, attempts=numAttempts)) roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if self.config.has_option("logging","log_command_executes") \ and int(self.config.get("logging", "log_command_executes")) == 1 \ and log_command_output: if roleResult['stdout'] != '': logger.info("Begin command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) self.log_command_output(roleResult['stdout'], str(command['taskId'])) logger.info("End command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stderr'] != '': logger.info("Begin command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) self.log_command_output(roleResult['stderr'], str(command['taskId'])) logger.info("End command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams'][ 'custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str( json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let recovery manager know the current state if status == self.COMPLETED_STATUS: if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \ and self.controller.recovery_manager.configured_for_recovery(command['role']): if command['roleCommand'] == self.ROLE_COMMAND_START: self.controller.recovery_manager.update_current_status( command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness( command['role'], False) logger.info( "After EXECUTION_COMMAND (START), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status( command['role'])) elif command[ 'roleCommand'] == self.ROLE_COMMAND_STOP or command[ 'roleCommand'] == self.ROLE_COMMAND_INSTALL: self.controller.recovery_manager.update_current_status( command['role'], LiveStatus.DEAD_STATUS) logger.info( "After EXECUTION_COMMAND (STOP/INSTALL), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status( command['role'])) elif command[ 'roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND: if command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART: self.controller.recovery_manager.update_current_status( command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness( command['role'], False) logger.info( "After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " + self.controller.recovery_manager. get_current_status(command['role'])) pass # let ambari know that configuration tags were applied configHandler = ActualConfigHandler(self.config, self.configTags) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = { 'serviceName': command['serviceName'], 'componentName': command['role'] } if 'roleCommand' in command and \ (command['roleCommand'] == self.ROLE_COMMAND_START or (command['roleCommand'] == self.ROLE_COMMAND_INSTALL and component in LiveStatus.CLIENT_COMPONENTS) or (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and 'custom_command' in command['hostLevelParams'] and command['hostLevelParams']['custom_command'] in (self.CUSTOM_COMMAND_RESTART, self.CUSTOM_COMMAND_START))): configHandler.write_actual_component( command['role'], command['configurationTags']) if 'clientsToUpdateConfigs' in command[ 'hostLevelParams'] and command['hostLevelParams'][ 'clientsToUpdateConfigs']: configHandler.write_client_components( command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult[ 'configurationTags'] = configHandler.read_actual_component( command['role']) elif status == self.FAILED_STATUS: if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \ and self.controller.recovery_manager.configured_for_recovery(command['role']): if command['roleCommand'] == self.ROLE_COMMAND_INSTALL: self.controller.recovery_manager.update_current_status( command['role'], self.controller.recovery_manager.INSTALL_FAILED) logger.info( "After EXECUTION_COMMAND (INSTALL), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status( command['role'])) self.commandStatuses.put_command_status(command, roleResult)
def executeCommand(self, command): clusterName = command['clusterName'] commandId = command['commandId'] hostname = command['hostname'] params = command['hostLevelParams'] clusterHostInfo = command['clusterHostInfo'] roleCommand = command['roleCommand'] serviceName = command['serviceName'] configurations = command['configurations'] result = [] logger.info("Executing command with id = " + str(commandId) +\ " for role = " + command['role'] + " of " +\ "cluster " + clusterName) logger.debug(pprint.pformat(command)) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report self.commandInProgress = { 'role': command['role'], 'actionId': commandId, 'taskId': taskId, 'clusterName': clusterName, 'serviceName': serviceName, 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'roleCommand': roleCommand } # running command if command['commandType'] == ActionQueue.EXECUTION_COMMAND: if command['roleCommand'] == ActionQueue.UPGRADE_STATUS: commandresult = self.upgradeExecutor.perform_stack_upgrade(command, self.commandInProgress['tmpout'], self.commandInProgress['tmperr']) else: commandresult = self.puppetExecutor.runCommand(command, self.commandInProgress['tmpout'], self.commandInProgress['tmperr']) # dumping results self.commandInProgress = None status = "COMPLETED" if commandresult['exitcode'] != 0: status = "FAILED" # assume some puppet plumbing to run these commands roleResult = {'role': command['role'], 'actionId': commandId, 'taskId': command['taskId'], 'stdout': commandresult['stdout'], 'clusterName': clusterName, 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'serviceName': serviceName, 'status': status, 'roleCommand': roleCommand} if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know that configuration tags were applied if status == 'COMPLETED': configHandler = ActualConfigHandler(self.config) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] if command.has_key('roleCommand') and command['roleCommand'] == 'START': configHandler.copy_to_component(command['role']) roleResult['configurationTags'] = configHandler.read_actual_component(command['role']) result.append(roleResult) return result
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] isCommandBackground = command[ 'commandType'] == self.BACKGROUND_EXECUTION_COMMAND message = "Executing command with id = {commandId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), role=command['role'], cluster=clusterName) logger.info(message) logger.debug(pprint.pformat(command)) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template( command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut': self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) # running command commandresult = self.customServiceOrchestrator.runCommand( command, in_progress_status['tmpout'], in_progress_status['tmperr']) # dumping results if isCommandBackground: return else: status = self.COMPLETED_STATUS if commandresult[ 'exitcode'] == 0 else self.FAILED_STATUS roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams'][ 'custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str( json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let ambari know that configuration tags were applied if status == self.COMPLETED_STATUS: configHandler = ActualConfigHandler(self.config, self.configTags) #update if command.has_key('forceRefreshConfigTags') and len( command['forceRefreshConfigTags']) > 0: forceRefreshConfigTags = command['forceRefreshConfigTags'] logger.info("Got refresh additional component tags command") for configTag in forceRefreshConfigTags: configHandler.update_component_tag( command['role'], configTag, command['configurationTags'][configTag]) roleResult[ 'customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side command[ 'configurationTags'] = configHandler.read_actual_component( command['role']) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = { 'serviceName': command['serviceName'], 'componentName': command['role'] } if command.has_key('roleCommand') and \ (command['roleCommand'] == self.ROLE_COMMAND_START or \ (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \ and component in LiveStatus.CLIENT_COMPONENTS) or \ (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \ command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component( command['role'], command['configurationTags']) if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \ command['hostLevelParams']['clientsToUpdateConfigs']: configHandler.write_client_components( command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult[ 'configurationTags'] = configHandler.read_actual_component( command['role']) self.commandStatuses.put_command_status(command, roleResult)
class LiveStatus: SERVICES = [ "HDFS", "MAPREDUCE", "GANGLIA", "HBASE", "NAGIOS", "ZOOKEEPER", "OOZIE", "HCATALOG", "KERBEROS", "TEMPLETON", "HIVE", "WEBHCAT", "YARN", "MAPREDUCE2", "FLUME", "TEZ", "FALCON", "STORM" ] CLIENT_COMPONENTS = [ {"serviceName" : "HBASE", "componentName" : "HBASE_CLIENT"}, {"serviceName" : "HDFS", "componentName" : "HDFS_CLIENT"}, {"serviceName" : "MAPREDUCE", "componentName" : "MAPREDUCE_CLIENT"}, {"serviceName" : "ZOOKEEPER", "componentName" : "ZOOKEEPER_CLIENT"}, {"serviceName" : "OOZIE", "componentName" : "OOZIE_CLIENT"}, {"serviceName" : "HCATALOG", "componentName" : "HCAT"}, {"serviceName" : "HIVE", "componentName" : "HIVE_CLIENT"}, {"serviceName" : "YARN", "componentName" : "YARN_CLIENT"}, {"serviceName" : "MAPREDUCE2", "componentName" : "MAPREDUCE2_CLIENT"}, {"serviceName" : "PIG", "componentName" : "PIG"}, {"serviceName" : "SQOOP", "componentName" : "SQOOP"}, {"serviceName" : "TEZ", "componentName" : "TEZ_CLIENT"}, {"serviceName" : "FALCON", "componentName" : "FALCON_CLIENT"} ] COMPONENTS = [ {"serviceName" : "HDFS", "componentName" : "DATANODE"}, {"serviceName" : "HDFS", "componentName" : "NAMENODE"}, {"serviceName" : "HDFS", "componentName" : "SECONDARY_NAMENODE"}, {"serviceName" : "HDFS", "componentName" : "JOURNALNODE"}, {"serviceName" : "HDFS", "componentName" : "ZKFC"}, {"serviceName" : "MAPREDUCE", "componentName" : "JOBTRACKER"}, {"serviceName" : "MAPREDUCE", "componentName" : "TASKTRACKER"}, {"serviceName" : "GANGLIA", "componentName" : "GANGLIA_SERVER"}, {"serviceName" : "GANGLIA", "componentName" : "GANGLIA_MONITOR"}, {"serviceName" : "HBASE", "componentName" : "HBASE_MASTER"}, {"serviceName" : "HBASE", "componentName" : "HBASE_REGIONSERVER"}, {"serviceName" : "NAGIOS", "componentName" : "NAGIOS_SERVER"}, {"serviceName" : "FLUME", "componentName" : "FLUME_SERVER"}, {"serviceName" : "ZOOKEEPER", "componentName" : "ZOOKEEPER_SERVER"}, {"serviceName" : "OOZIE", "componentName" : "OOZIE_SERVER"}, {"serviceName" : "HCATALOG", "componentName" : "HCATALOG_SERVER"}, {"serviceName" : "KERBEROS", "componentName" : "KERBEROS_SERVER"}, {"serviceName" : "HIVE", "componentName" : "HIVE_SERVER"}, {"serviceName" : "HIVE", "componentName" : "HIVE_METASTORE"}, {"serviceName" : "HIVE", "componentName" : "MYSQL_SERVER"}, {"serviceName" : "WEBHCAT", "componentName" : "WEBHCAT_SERVER"}, {"serviceName" : "YARN", "componentName" : "RESOURCEMANAGER"}, {"serviceName" : "YARN", "componentName" : "NODEMANAGER"}, {"serviceName" : "YARN", "componentName" : "APP_TIMELINE_SERVER"}, {"serviceName" : "MAPREDUCE2", "componentName" : "HISTORYSERVER"}, {"serviceName" : "FALCON", "componentName" : "FALCON_SERVER"}, {"serviceName" : "STORM", "componentName" : "NIMBUS"}, {"serviceName" : "STORM", "componentName" : "STORM_REST_API"}, {"serviceName" : "STORM", "componentName" : "SUPERVISOR"}, {"serviceName" : "STORM", "componentName" : "STORM_UI_SERVER"}, {"serviceName" : "STORM", "componentName" : "DRPC_SERVER"} ] LIVE_STATUS = "STARTED" DEAD_STATUS = "INSTALLED" def __init__(self, cluster, service, component, globalConfig, config, configTags): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig versionsFileDir = config.get('agent', 'prefix') self.versionsHandler = StackVersionsFileHandler(versionsFileDir) self.configTags = configTags self.actualConfigHandler = ActualConfigHandler(config, configTags) def belongsToService(self, component): #TODO: Should also check belonging of server to cluster return component['serviceName'] == self.service def build(self, forsed_component_status = None): """ If forsed_component_status is explicitly defined, than StatusCheck methods are not used. This feature has been added to support custom (ver 2.0) services. """ global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS livestatus = None component = {"serviceName" : self.service, "componentName" : self.component} if forsed_component_status: # If already determined status = forsed_component_status # Nothing to do elif component in self.CLIENT_COMPONENTS: status = self.DEAD_STATUS # CLIENT components can't have status STARTED elif component in self.COMPONENTS: statusCheck = StatusCheck(AmbariConfig.servicesToPidNames, AmbariConfig.pidPathesVars, self.globalConfig, AmbariConfig.servicesToLinuxUser) serviceStatus = statusCheck.getStatus(self.component) if serviceStatus is None: logger.warn("There is no service to pid mapping for " + self.component) status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS livestatus ={"componentName" : self.component, "msg" : "", "status" : status, "clusterName" : self.cluster, "serviceName" : self.service, "stackVersion": self.versionsHandler. read_stack_version(self.component) } active_config = self.actualConfigHandler.read_actual_component(self.component) if not active_config is None: livestatus['configurationTags'] = active_config logger.debug("The live status for component " + str(self.component) +\ " of service " + str(self.service) + " is " + str(livestatus)) return livestatus
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] message = "Executing command with id = {commandId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), role=command['role'], cluster=clusterName) logger.info(message) logger.debug(pprint.pformat(command)) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template(command) in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) # running command commandresult = self.customServiceOrchestrator.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr']) # dumping results status = self.COMPLETED_STATUS if commandresult['exitcode'] != 0: status = self.FAILED_STATUS roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams']['custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let ambari know that configuration tags were applied if status == self.COMPLETED_STATUS: configHandler = ActualConfigHandler(self.config, self.configTags) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = {'serviceName':command['serviceName'],'componentName':command['role']} if command.has_key('roleCommand') and \ (command['roleCommand'] == self.ROLE_COMMAND_START or \ (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \ and component in LiveStatus.CLIENT_COMPONENTS) or \ (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \ command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component(command['role'], command['configurationTags']) configHandler.write_client_components(command['serviceName'], command['configurationTags']) roleResult['configurationTags'] = configHandler.read_actual_component(command['role']) self.commandStatuses.put_command_status(command, roleResult)
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND isAutoExecuteCommand = command['commandType'] == self.AUTO_EXECUTION_COMMAND message = "Executing command with id = {commandId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), role=command['role'], cluster=clusterName) logger.info(message) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template(command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini if not isAutoExecuteCommand: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) else: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) numAttempts = 0 retryDuration = 0 # even with 0 allow one attempt retryAble = False delay = 1 if 'commandParams' in command: if 'max_duration_for_retries' in command['commandParams']: retryDuration = int(command['commandParams']['max_duration_for_retries']) if 'command_retry_enabled' in command['commandParams']: retryAble = command['commandParams']['command_retry_enabled'] == "true" if isAutoExecuteCommand: retryAble = False logger.debug("Command execution metadata - retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}". format(retryAble=retryAble, retryDuration=retryDuration)) while retryDuration >= 0: numAttempts += 1 start = 0 if retryAble: start = int(time.time()) # running command commandresult = self.customServiceOrchestrator.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr'], override_output_files=numAttempts == 1, retry=numAttempts > 1) end = 1 if retryAble: end = int(time.time()) retryDuration -= (end - start) # dumping results if isCommandBackground: return else: if commandresult['exitcode'] == 0: status = self.COMPLETED_STATUS else: status = self.FAILED_STATUS if status != self.COMPLETED_STATUS and retryAble == True and retryDuration > 0: delay = self.get_retry_delay(delay) if delay > retryDuration: delay = retryDuration retryDuration -= delay # allow one last attempt logger.info("Retrying command id {cid} after a wait of {delay}".format(cid=taskId, delay=delay)) time.sleep(delay) continue else: break roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams']['custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let recovery manager know the current state if status == self.COMPLETED_STATUS: if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \ and self.controller.recovery_manager.configured_for_recovery(command['role']): if command['roleCommand'] == self.ROLE_COMMAND_START: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness(command['role'], False) logger.info("After EXECUTION_COMMAND (START), current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS) logger.info("After EXECUTION_COMMAND (STOP/INSTALL), current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND: if command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness(command['role'], False) logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) pass # let ambari know that configuration tags were applied configHandler = ActualConfigHandler(self.config, self.configTags) #update if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0 : forceRefreshConfigTags = command['forceRefreshConfigTags'] logger.info("Got refresh additional component tags command") for configTag in forceRefreshConfigTags : configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag]) roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side command['configurationTags'] = configHandler.read_actual_component(command['role']) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = {'serviceName':command['serviceName'],'componentName':command['role']} if command.has_key('roleCommand') and \ (command['roleCommand'] == self.ROLE_COMMAND_START or \ (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \ and component in LiveStatus.CLIENT_COMPONENTS) or \ (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \ command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component(command['role'], command['configurationTags']) if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \ command['hostLevelParams']['clientsToUpdateConfigs']: configHandler.write_client_components(command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult['configurationTags'] = configHandler.read_actual_component(command['role']) self.commandStatuses.put_command_status(command, roleResult)
class LiveStatus: SERVICES = [] CLIENT_COMPONENTS = [] COMPONENTS = [] LIVE_STATUS = "STARTED" DEAD_STATUS = "INSTALLED" def __init__(self, cluster, service, component, globalConfig, config, configTags): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig versionsFileDir = config.get('agent', 'prefix') self.versionsHandler = StackVersionsFileHandler(versionsFileDir) self.configTags = configTags self.actualConfigHandler = ActualConfigHandler(config, configTags) def belongsToService(self, component): #TODO: Should also check belonging of server to cluster return component['serviceName'] == self.service def build(self, forsed_component_status = None): """ If forsed_component_status is explicitly defined, than StatusCheck methods are not used. This feature has been added to support custom (ver 2.0) services. """ global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS component = {"serviceName" : self.service, "componentName" : self.component} if forsed_component_status: # If already determined status = forsed_component_status # Nothing to do elif component in self.CLIENT_COMPONENTS: status = self.DEAD_STATUS # CLIENT components can't have status STARTED elif component in self.COMPONENTS: statusCheck = StatusCheck(AmbariConfig.servicesToPidNames, AmbariConfig.pidPathVars, self.globalConfig, AmbariConfig.servicesToLinuxUser) serviceStatus = statusCheck.getStatus(self.component) if serviceStatus is None: logger.warn("There is no service to pid mapping for " + self.component) status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS livestatus = {"componentName" : self.component, "msg" : "", "status" : status, "clusterName" : self.cluster, "serviceName" : self.service, "stackVersion": self.versionsHandler. read_stack_version(self.component) } active_config = self.actualConfigHandler.read_actual_component(self.component) if not active_config is None: livestatus['configurationTags'] = active_config logger.debug("The live status for component " + str(self.component) +\ " of service " + str(self.service) + " is " + str(livestatus)) return livestatus
class LiveStatus: SERVICES = [] CLIENT_COMPONENTS = [] COMPONENTS = [] LIVE_STATUS = "STARTED" DEAD_STATUS = "INSTALLED" def __init__(self, cluster, service, component, globalConfig, config, configTags): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig versionsFileDir = config.get('agent', 'prefix') self.versionsHandler = StackVersionsFileHandler(versionsFileDir) self.configTags = configTags self.actualConfigHandler = ActualConfigHandler(config, configTags) def belongsToService(self, component): #TODO: Should also check belonging of server to cluster return component['serviceName'] == self.service def build(self, forced_component_status = None): """ If forced_component_status is explicitly defined, than StatusCheck methods are not used. This feature has been added to support custom (ver 2.0) services. """ global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS component = {"serviceName" : self.service, "componentName" : self.component} if forced_component_status: # If already determined status = forced_component_status # Nothing to do elif component in self.CLIENT_COMPONENTS: status = self.DEAD_STATUS # CLIENT components can't have status STARTED elif component in self.COMPONENTS: statusCheck = StatusCheck(AmbariConfig.servicesToPidNames, AmbariConfig.pidPathVars, self.globalConfig, AmbariConfig.servicesToLinuxUser) serviceStatus = statusCheck.getStatus(self.component) if serviceStatus is None: logger.warn("There is no service to pid mapping for " + self.component) status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS livestatus = {"componentName" : self.component, "msg" : "", "status" : status, "clusterName" : self.cluster, "serviceName" : self.service, "stackVersion": self.versionsHandler. read_stack_version(self.component) } active_config = self.actualConfigHandler.read_actual_component(self.component) if not active_config is None: livestatus['configurationTags'] = active_config logger.debug("The live status for component " + str(self.component) +\ " of service " + str(self.service) + " is " + str(livestatus)) return livestatus
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND isAutoExecuteCommand = command['commandType'] == self.AUTO_EXECUTION_COMMAND message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), taskId = str(command['taskId']), role=command['role'], cluster=clusterName) logger.info(message) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template(command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini if not isAutoExecuteCommand: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) else: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) numAttempts = 0 retryDuration = 0 # even with 0 allow one attempt retryAble = False delay = 1 if 'commandParams' in command: if 'max_duration_for_retries' in command['commandParams']: retryDuration = int(command['commandParams']['max_duration_for_retries']) if 'command_retry_enabled' in command['commandParams']: retryAble = command['commandParams']['command_retry_enabled'] == "true" if isAutoExecuteCommand: retryAble = False logger.debug("Command execution metadata - retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}". format(retryAble=retryAble, retryDuration=retryDuration)) while retryDuration >= 0: numAttempts += 1 start = 0 if retryAble: start = int(time.time()) # running command commandresult = self.customServiceOrchestrator.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr'], override_output_files=numAttempts == 1, retry=numAttempts > 1) end = 1 if retryAble: end = int(time.time()) retryDuration -= (end - start) # dumping results if isCommandBackground: return else: if commandresult['exitcode'] == 0: status = self.COMPLETED_STATUS else: status = self.FAILED_STATUS if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0: delay = self.get_retry_delay(delay) if delay > retryDuration: delay = retryDuration retryDuration -= delay # allow one last attempt logger.info("Retrying command id {cid} after a wait of {delay}".format(cid=taskId, delay=delay)) time.sleep(delay) continue else: break roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if self.config.has_option("logging","log_command_executes") and int(self.config.get("logging", "log_command_executes")) == 1: if roleResult['stdout'] != '': logger.info("Begin command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) logger.info(roleResult['stdout']) logger.info("End command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stderr'] != '': logger.info("Begin command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) logger.info(roleResult['stderr']) logger.info("End command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams']['custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let recovery manager know the current state if status == self.COMPLETED_STATUS: if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \ and self.controller.recovery_manager.configured_for_recovery(command['role']): if command['roleCommand'] == self.ROLE_COMMAND_START: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness(command['role'], False) logger.info("After EXECUTION_COMMAND (START), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS) logger.info("After EXECUTION_COMMAND (STOP/INSTALL), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND: if command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness(command['role'], False) logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) pass # let ambari know that configuration tags were applied configHandler = ActualConfigHandler(self.config, self.configTags) #update if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0 : forceRefreshConfigTags = command['forceRefreshConfigTags'] logger.info("Got refresh additional component tags command") for configTag in forceRefreshConfigTags : configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag]) roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side command['configurationTags'] = configHandler.read_actual_component(command['role']) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] if 'roleCommand' in command and \ (command['roleCommand'] == self.ROLE_COMMAND_START or (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and 'custom_command' in command['hostLevelParams'] and command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component(command['role'], command['configurationTags']) if 'clientsToUpdateConfigs' in command['hostLevelParams'] and command['hostLevelParams']['clientsToUpdateConfigs']: configHandler.write_client_components(command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult['configurationTags'] = configHandler.read_actual_component( command['role']) self.commandStatuses.put_command_status(command, roleResult)
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND message = "Executing command with id = {commandId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), role=command['role'], cluster=clusterName) logger.info(message) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template(command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) # running command commandresult = self.customServiceOrchestrator.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr']) # dumping results if isCommandBackground: return else: status = self.COMPLETED_STATUS if commandresult['exitcode'] == 0 else self.FAILED_STATUS roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams']['custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let ambari know that configuration tags were applied if status == self.COMPLETED_STATUS: configHandler = ActualConfigHandler(self.config, self.configTags) #update if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0 : forceRefreshConfigTags = command['forceRefreshConfigTags'] logger.info("Got refresh additional component tags command") for configTag in forceRefreshConfigTags : configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag]) roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side command['configurationTags'] = configHandler.read_actual_component(command['role']) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = {'serviceName':command['serviceName'],'componentName':command['role']} if command.has_key('roleCommand') and \ (command['roleCommand'] == self.ROLE_COMMAND_START or \ (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \ and component in LiveStatus.CLIENT_COMPONENTS) or \ (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \ command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component(command['role'], command['configurationTags']) if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \ command['hostLevelParams']['clientsToUpdateConfigs']: configHandler.write_client_components(command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult['configurationTags'] = configHandler.read_actual_component(command['role']) self.commandStatuses.put_command_status(command, roleResult)
def executeCommand(self, command): clusterName = command['clusterName'] commandId = command['commandId'] hostname = command['hostname'] params = command['hostLevelParams'] clusterHostInfo = command['clusterHostInfo'] roleCommand = command['roleCommand'] serviceName = command['serviceName'] configurations = command['configurations'] result = [] logger.info("Executing command with id = " + str(commandId) +\ " for role = " + command['role'] + " of " +\ "cluster " + clusterName) logger.debug(pprint.pformat(command)) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report self.commandInProgress = { 'role': command['role'], 'actionId': commandId, 'taskId': taskId, 'clusterName': clusterName, 'serviceName': serviceName, 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'roleCommand': roleCommand } # running command if command['commandType'] == ActionQueue.EXECUTION_COMMAND: if command['roleCommand'] == ActionQueue.UPGRADE_STATUS: commandresult = self.upgradeExecutor.perform_stack_upgrade( command, self.commandInProgress['tmpout'], self.commandInProgress['tmperr']) else: commandresult = self.puppetExecutor.runCommand( command, self.commandInProgress['tmpout'], self.commandInProgress['tmperr']) # dumping results self.commandInProgress = None status = "COMPLETED" if commandresult['exitcode'] != 0: status = "FAILED" # assume some puppet plumbing to run these commands roleResult = { 'role': command['role'], 'actionId': commandId, 'taskId': command['taskId'], 'stdout': commandresult['stdout'], 'clusterName': clusterName, 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'serviceName': serviceName, 'status': status, 'roleCommand': roleCommand } if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know that configuration tags were applied if status == 'COMPLETED': configHandler = ActualConfigHandler(self.config) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] if command.has_key( 'roleCommand') and command['roleCommand'] == 'START': configHandler.copy_to_component(command['role']) roleResult[ 'configurationTags'] = configHandler.read_actual_component( command['role']) result.append(roleResult) return result
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] isCommandBackground = command[ 'commandType'] == self.BACKGROUND_EXECUTION_COMMAND isAutoExecuteCommand = command[ 'commandType'] == self.AUTO_EXECUTION_COMMAND message = "Executing command with id = {commandId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), role=command['role'], cluster=clusterName) logger.info(message) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template( command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini if not isAutoExecuteCommand: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut': self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) else: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt', 'structuredOut': self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) numAttempts = 0 maxAttempts = 1 retryAble = False delay = 1 if 'commandParams' in command: if 'command_retry_max_attempt_count' in command['commandParams']: maxAttempts = int(command['commandParams'] ['command_retry_max_attempt_count']) if 'command_retry_enabled' in command['commandParams']: retryAble = command['commandParams'][ 'command_retry_enabled'] == "true" logger.debug( "Command execution metadata - retry enabled = {retryAble}, max attempt count = {maxAttemptCount}" .format(retryAble=retryAble, maxAttemptCount=maxAttempts)) while numAttempts < maxAttempts: numAttempts += 1 # running command commandresult = self.customServiceOrchestrator.runCommand( command, in_progress_status['tmpout'], in_progress_status['tmperr'], override_output_files=numAttempts == 1, retry=numAttempts > 1) # dumping results if isCommandBackground: return else: status = self.COMPLETED_STATUS if commandresult[ 'exitcode'] == 0 else self.FAILED_STATUS if status != self.COMPLETED_STATUS and retryAble == True and maxAttempts > numAttempts: delay = self.get_retry_delay(delay) logger.info( "Retrying command id {cid} after a wait of {delay}".format( cid=taskId, delay=delay)) time.sleep(delay) continue else: break roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams'][ 'custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str( json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let ambari know that configuration tags were applied if status == self.COMPLETED_STATUS: configHandler = ActualConfigHandler(self.config, self.configTags) #update if command.has_key('forceRefreshConfigTags') and len( command['forceRefreshConfigTags']) > 0: forceRefreshConfigTags = command['forceRefreshConfigTags'] logger.info("Got refresh additional component tags command") for configTag in forceRefreshConfigTags: configHandler.update_component_tag( command['role'], configTag, command['configurationTags'][configTag]) roleResult[ 'customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side command[ 'configurationTags'] = configHandler.read_actual_component( command['role']) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = { 'serviceName': command['serviceName'], 'componentName': command['role'] } if command.has_key('roleCommand') and \ (command['roleCommand'] == self.ROLE_COMMAND_START or \ (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \ and component in LiveStatus.CLIENT_COMPONENTS) or \ (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \ command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component( command['role'], command['configurationTags']) if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \ command['hostLevelParams']['clientsToUpdateConfigs']: configHandler.write_client_components( command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult[ 'configurationTags'] = configHandler.read_actual_component( command['role']) self.commandStatuses.put_command_status(command, roleResult)
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] logger.info("Executing command with id = " + str(commandId) +\ " for role = " + command['role'] + " of " +\ "cluster " + clusterName) logger.debug(pprint.pformat(command)) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template( command) in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) # TODO: Add CustomServiceOrchestrator call somewhere here # running command # Create a new instance of executor for the current thread puppetExecutor = PuppetExecutor.PuppetExecutor( self.config.get('puppet', 'puppetmodules'), self.config.get('puppet', 'puppet_home'), self.config.get('puppet', 'facter_home'), self.config.get('agent', 'prefix'), self.config) commandresult = puppetExecutor.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr']) # dumping results status = self.COMPLETED_STATUS if commandresult['exitcode'] != 0: status = self.FAILED_STATUS roleResult = self.commandStatuses.generate_report_template(command) # assume some puppet plumbing to run these commands roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know that configuration tags were applied if status == self.COMPLETED_STATUS: configHandler = ActualConfigHandler(self.config) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = { 'serviceName': command['serviceName'], 'componentName': command['role'] } if command.has_key('roleCommand') and \ (command['roleCommand'] == self.ROLE_COMMAND_START or \ (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \ and component in LiveStatus.CLIENT_COMPONENTS)): configHandler.copy_to_component(command['role']) roleResult[ 'configurationTags'] = configHandler.read_actual_component( command['role']) self.commandStatuses.put_command_status(command, roleResult)
class LiveStatus: SERVICES = [ "HDFS", "MAPREDUCE", "GANGLIA", "HBASE", "NAGIOS", "ZOOKEEPER", "OOZIE", "HCATALOG", "KERBEROS", "TEMPLETON", "HIVE", "WEBHCAT", "YARN", "MAPREDUCE2", "FLUME" ] CLIENT_COMPONENTS = [{ "serviceName": "HBASE", "componentName": "HBASE_CLIENT" }, { "serviceName": "HDFS", "componentName": "HDFS_CLIENT" }, { "serviceName": "MAPREDUCE", "componentName": "MAPREDUCE_CLIENT" }, { "serviceName": "ZOOKEEPER", "componentName": "ZOOKEEPER_CLIENT" }, { "serviceName": "OOZIE", "componentName": "OOZIE_CLIENT" }, { "serviceName": "HCATALOG", "componentName": "HCAT" }, { "serviceName": "HIVE", "componentName": "HIVE_CLIENT" }, { "serviceName": "YARN", "componentName": "YARN_CLIENT" }, { "serviceName": "MAPREDUCE2", "componentName": "MAPREDUCE2_CLIENT" }, { "serviceName": "PIG", "componentName": "PIG" }, { "serviceName": "SQOOP", "componentName": "SQOOP" }] COMPONENTS = [ { "serviceName": "HDFS", "componentName": "DATANODE" }, { "serviceName": "HDFS", "componentName": "NAMENODE" }, { "serviceName": "HDFS", "componentName": "SECONDARY_NAMENODE" }, { "serviceName": "HDFS", "componentName": "JOURNALNODE" }, { "serviceName": "HDFS", "componentName": "ZKFC" }, { "serviceName": "MAPREDUCE", "componentName": "JOBTRACKER" }, { "serviceName": "MAPREDUCE", "componentName": "TASKTRACKER" }, { "serviceName": "GANGLIA", "componentName": "GANGLIA_SERVER" }, { "serviceName": "GANGLIA", "componentName": "GANGLIA_MONITOR" }, { "serviceName": "HBASE", "componentName": "HBASE_MASTER" }, { "serviceName": "HBASE", "componentName": "HBASE_REGIONSERVER" }, { "serviceName": "NAGIOS", "componentName": "NAGIOS_SERVER" }, { "serviceName": "FLUME", "componentName": "FLUME_SERVER" }, { "serviceName": "ZOOKEEPER", "componentName": "ZOOKEEPER_SERVER" }, { "serviceName": "OOZIE", "componentName": "OOZIE_SERVER" }, { "serviceName": "HCATALOG", "componentName": "HCATALOG_SERVER" }, { "serviceName": "KERBEROS", "componentName": "KERBEROS_SERVER" }, { "serviceName": "HIVE", "componentName": "HIVE_SERVER" }, { "serviceName": "HIVE", "componentName": "HIVE_METASTORE" }, { "serviceName": "HIVE", "componentName": "MYSQL_SERVER" }, { "serviceName": "WEBHCAT", "componentName": "WEBHCAT_SERVER" }, { "serviceName": "YARN", "componentName": "RESOURCEMANAGER" }, { "serviceName": "YARN", "componentName": "NODEMANAGER" }, { "serviceName": "MAPREDUCE2", "componentName": "HISTORYSERVER" }, ] LIVE_STATUS = "STARTED" DEAD_STATUS = "INSTALLED" def __init__(self, cluster, service, component, globalConfig, config): self.cluster = cluster self.service = service self.component = component self.globalConfig = globalConfig versionsFileDir = config.get('agent', 'prefix') self.versionsHandler = StackVersionsFileHandler(versionsFileDir) self.actualConfigHandler = ActualConfigHandler(config) def belongsToService(self, component): #TODO: Should also check belonging of server to cluster return component['serviceName'] == self.service # Live status was stripped from heartbeat after revision e1718dd def build(self): global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS statusCheck = StatusCheck(AmbariConfig.servicesToPidNames, AmbariConfig.pidPathesVars, self.globalConfig, AmbariConfig.servicesToLinuxUser) livestatus = None component = { "serviceName": self.service, "componentName": self.component } if component in self.COMPONENTS + self.CLIENT_COMPONENTS: # CLIENT components can't have status STARTED if component in self.CLIENT_COMPONENTS: status = self.DEAD_STATUS else: serviceStatus = statusCheck.getStatus(self.component) if serviceStatus is None: logger.warn("There is no service to pid mapping for " + self.component) status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS livestatus = { "componentName": self.component, "msg": "", "status": status, "clusterName": self.cluster, "serviceName": self.service, "stackVersion": self.versionsHandler.read_stack_version(self.component) } active_config = self.actualConfigHandler.read_actual_component( self.component) if not active_config is None: livestatus['configurationTags'] = active_config logger.debug("The live status for component " + str(self.component) +\ " of service " + str(self.service) + " is " + str(livestatus)) return livestatus
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] command_format = self.determine_command_format_version(command) message = "Executing command with id = {commandId} for role = {role} of " \ "cluster {cluster}. Command format={command_format}".format( commandId = str(commandId), role=command['role'], cluster=clusterName, command_format=command_format) logger.info(message) logger.debug(pprint.pformat(command)) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template(command) in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) # running command if command_format == self.COMMAND_FORMAT_V1: # Create a new instance of executor for the current thread puppetExecutor = PuppetExecutor.PuppetExecutor( self.config.get('puppet', 'puppetmodules'), self.config.get('puppet', 'puppet_home'), self.config.get('puppet', 'facter_home'), self.config.get('agent', 'prefix'), self.config) commandresult = puppetExecutor.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr']) else: commandresult = self.customServiceOrchestrator.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr']) # dumping results status = self.COMPLETED_STATUS if commandresult['exitcode'] != 0: status = self.FAILED_STATUS roleResult = self.commandStatuses.generate_report_template(command) # assume some puppet plumbing to run these commands roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams']['custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str(commandresult['structuredOut']) else: roleResult['structuredOut'] = '' # let ambari know that configuration tags were applied if status == self.COMPLETED_STATUS: configHandler = ActualConfigHandler(self.config, self.configTags) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = {'serviceName':command['serviceName'],'componentName':command['role']} if command.has_key('roleCommand') and \ (command['roleCommand'] == self.ROLE_COMMAND_START or \ (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \ and component in LiveStatus.CLIENT_COMPONENTS) or \ (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \ command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component(command['role'], command['configurationTags']) configHandler.write_client_components(command['serviceName'], command['configurationTags']) roleResult['configurationTags'] = configHandler.read_actual_component(command['role']) self.commandStatuses.put_command_status(command, roleResult)
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterId = command['clusterId'] commandId = command['commandId'] isCommandBackground = command[ 'commandType'] == self.BACKGROUND_EXECUTION_COMMAND isAutoExecuteCommand = command[ 'commandType'] == self.AUTO_EXECUTION_COMMAND message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \ "cluster_id {cluster}.".format( commandId = str(commandId), taskId = str(command['taskId']), role=command['role'], cluster=clusterId) logger.info(message) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template( command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini if not isAutoExecuteCommand: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut': self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) else: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt', 'structuredOut': self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) numAttempts = 0 retryDuration = 0 # even with 0 allow one attempt retryAble = False delay = 1 log_command_output = True if 'commandParams' in command and 'log_output' in command[ 'commandParams'] and "false" == command['commandParams'][ 'log_output']: log_command_output = False if 'commandParams' in command: if 'max_duration_for_retries' in command['commandParams']: retryDuration = int( command['commandParams']['max_duration_for_retries']) if 'command_retry_enabled' in command['commandParams']: retryAble = command['commandParams'][ 'command_retry_enabled'] == "true" if isAutoExecuteCommand: retryAble = False logger.info( "Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}, log_output = {log_command_output}" .format(taskId=taskId, retryAble=retryAble, retryDuration=retryDuration, log_command_output=log_command_output)) command_canceled = False while retryDuration >= 0: numAttempts += 1 start = 0 if retryAble: start = int(time.time()) # running command commandresult = self.customServiceOrchestrator.runCommand( command, in_progress_status['tmpout'], in_progress_status['tmperr'], override_output_files=numAttempts == 1, retry=numAttempts > 1) end = 1 if retryAble: end = int(time.time()) retryDuration -= (end - start) # dumping results if isCommandBackground: logger.info( "Command is background command, quit retrying. Exit code: {exitCode}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}" .format(cid=taskId, exitCode=commandresult['exitcode'], retryAble=retryAble, retryDuration=retryDuration, delay=delay)) return else: if commandresult['exitcode'] == 0: status = self.COMPLETED_STATUS else: status = self.FAILED_STATUS if (commandresult['exitcode'] == -signal.SIGTERM) or (commandresult['exitcode'] == -signal.SIGKILL): logger.info( 'Command with taskId = {cid} was canceled!'.format( cid=taskId)) command_canceled = True break if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0: delay = self.get_retry_delay(delay) if delay > retryDuration: delay = retryDuration retryDuration -= delay # allow one last attempt commandresult[ 'stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n" logger.info( "Retrying command with taskId = {cid} after a wait of {delay}" .format(cid=taskId, delay=delay)) if 'agentLevelParams' not in command: command['agentLevelParams'] = {} command['agentLevelParams']['commandBeingRetried'] = "true" time.sleep(delay) continue else: logger.info( "Quit retrying for command with taskId = {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}" .format(cid=taskId, status=status, retryAble=retryAble, retryDuration=retryDuration, delay=delay)) break # do not fail task which was rescheduled from server if command_canceled: with self.lock: with self.commandQueue.mutex: for com in self.commandQueue.queue: if com['taskId'] == command['taskId']: logger.info( 'Command with taskId = {cid} was rescheduled by server. ' 'Fail report on cancelled command won\'t be sent with heartbeat.' .format(cid=taskId)) return # final result to stdout commandresult[ 'stdout'] += '\n\nCommand completed successfully!\n' if status == self.COMPLETED_STATUS else '\n\nCommand failed after ' + str( numAttempts) + ' tries\n' logger.info( 'Command with taskId = {cid} completed successfully!'.format( cid=taskId) if status == self.COMPLETED_STATUS else 'Command with taskId = {cid} failed after {attempts} tries'. format(cid=taskId, attempts=numAttempts)) roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if self.config.has_option("logging","log_command_executes") \ and int(self.config.get("logging", "log_command_executes")) == 1 \ and log_command_output: if roleResult['stdout'] != '': logger.info("Begin command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) self.log_command_output(roleResult['stdout'], str(command['taskId'])) logger.info("End command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stderr'] != '': logger.info("Begin command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) self.log_command_output(roleResult['stderr'], str(command['taskId'])) logger.info("End command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if 'commandParams' in command and command['commandParams'].has_key( 'custom_command'): roleResult['customCommand'] = command['commandParams'][ 'custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str( json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let recovery manager know the current state if status == self.COMPLETED_STATUS: # let ambari know that configuration tags were applied configHandler = ActualConfigHandler(self.config, self.configTags) """ #update if 'commandParams' in command: command_params = command['commandParams'] if command_params and command_params.has_key('forceRefreshConfigTags') and len(command_params['forceRefreshConfigTags']) > 0 : forceRefreshConfigTags = command_params['forceRefreshConfigTags'].split(',') logger.info("Got refresh additional component tags command") for configTag in forceRefreshConfigTags : configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag]) roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side command['configurationTags'] = configHandler.read_actual_component(command['role']) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = {'serviceName':command['serviceName'],'componentName':command['role']} if 'roleCommand' in command and \ (command['roleCommand'] == self.ROLE_COMMAND_START or (command['roleCommand'] == self.ROLE_COMMAND_INSTALL and component in LiveStatus.CLIENT_COMPONENTS) or (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and 'custom_command' in command['hostLevelParams'] and command['hostLevelParams']['custom_command'] in (self.CUSTOM_COMMAND_RESTART, self.CUSTOM_COMMAND_START, self.CUSTOM_COMMAND_RECONFIGURE))): configHandler.write_actual_component(command['role'], command['configurationTags']) if 'clientsToUpdateConfigs' in command['hostLevelParams'] and command['hostLevelParams']['clientsToUpdateConfigs']: configHandler.write_client_components(command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult['configurationTags'] = configHandler.read_actual_component( command['role']) """ self.recovery_manager.process_execution_command_result(command, status) self.commandStatuses.put_command_status(command, roleResult) cluster_id = str(command['clusterId']) if cluster_id != '-1' and cluster_id != 'null': service_name = command['serviceName'] if service_name != 'null': component_name = command['role'] self.component_status_executor.check_component_status( clusterId, service_name, component_name, "STATUS", report=True)
def execute_command(self, command): ''' Executes commands of type EXECUTION_COMMAND ''' clusterName = command['clusterName'] commandId = command['commandId'] isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND isAutoExecuteCommand = command['commandType'] == self.AUTO_EXECUTION_COMMAND message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \ "cluster {cluster}.".format( commandId = str(commandId), taskId = str(command['taskId']), role=command['role'], cluster=clusterName) logger.info(message) taskId = command['taskId'] # Preparing 'IN_PROGRESS' report in_progress_status = self.commandStatuses.generate_report_template(command) # The path of the files that contain the output log and error log use a prefix that the agent advertises to the # server. The prefix is defined in agent-config.ini if not isAutoExecuteCommand: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) else: in_progress_status.update({ 'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt', 'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt', 'structuredOut' : self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json', 'status': self.IN_PROGRESS_STATUS }) self.commandStatuses.put_command_status(command, in_progress_status) numAttempts = 0 retryDuration = 0 # even with 0 allow one attempt retryAble = False delay = 1 log_command_output = True if 'commandParams' in command and 'log_output' in command['commandParams'] and "false" == command['commandParams']['log_output']: log_command_output = False if 'commandParams' in command: if 'max_duration_for_retries' in command['commandParams']: retryDuration = int(command['commandParams']['max_duration_for_retries']) if 'command_retry_enabled' in command['commandParams']: retryAble = command['commandParams']['command_retry_enabled'] == "true" if isAutoExecuteCommand: retryAble = False logger.info("Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}, log_output = {log_command_output}". format(taskId=taskId, retryAble=retryAble, retryDuration=retryDuration, log_command_output=log_command_output)) while retryDuration >= 0: numAttempts += 1 start = 0 if retryAble: start = int(time.time()) # running command commandresult = self.customServiceOrchestrator.runCommand(command, in_progress_status['tmpout'], in_progress_status['tmperr'], override_output_files=numAttempts == 1, retry=numAttempts > 1) end = 1 if retryAble: end = int(time.time()) retryDuration -= (end - start) # dumping results if isCommandBackground: logger.info("Command is background command, quit retrying. Exit code: {exitCode}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}" .format(cid=taskId, exitCode=commandresult['exitcode'], retryAble=retryAble, retryDuration=retryDuration, delay=delay)) return else: if commandresult['exitcode'] == 0: status = self.COMPLETED_STATUS else: status = self.FAILED_STATUS if (commandresult['exitcode'] == -signal.SIGTERM) or (commandresult['exitcode'] == -signal.SIGKILL): logger.info('Command {cid} was canceled!'.format(cid=taskId)) break if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0: delay = self.get_retry_delay(delay) if delay > retryDuration: delay = retryDuration retryDuration -= delay # allow one last attempt commandresult['stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n" logger.info("Retrying command id {cid} after a wait of {delay}".format(cid=taskId, delay=delay)) time.sleep(delay) continue else: logger.info("Quit retrying for command id {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}" .format(cid=taskId, status=status, retryAble=retryAble, retryDuration=retryDuration, delay=delay)) break # final result to stdout commandresult['stdout'] += '\n\nCommand completed successfully!\n' if status == self.COMPLETED_STATUS else '\n\nCommand failed after ' + str(numAttempts) + ' tries\n' logger.info('Command {cid} completed successfully!'.format(cid=taskId) if status == self.COMPLETED_STATUS else 'Command {cid} failed after {attempts} tries'.format(cid=taskId, attempts=numAttempts)) roleResult = self.commandStatuses.generate_report_template(command) roleResult.update({ 'stdout': commandresult['stdout'], 'stderr': commandresult['stderr'], 'exitCode': commandresult['exitcode'], 'status': status, }) if self.config.has_option("logging","log_command_executes") \ and int(self.config.get("logging", "log_command_executes")) == 1 \ and log_command_output: if roleResult['stdout'] != '': logger.info("Begin command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) self.log_command_output(roleResult['stdout'], str(command['taskId'])) logger.info("End command output log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stderr'] != '': logger.info("Begin command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) self.log_command_output(roleResult['stderr'], str(command['taskId'])) logger.info("End command stderr log for command with id = " + str(command['taskId']) + ", role = " + command['role'] + ", roleCommand = " + command['roleCommand']) if roleResult['stdout'] == '': roleResult['stdout'] = 'None' if roleResult['stderr'] == '': roleResult['stderr'] = 'None' # let ambari know name of custom command if command['hostLevelParams'].has_key('custom_command'): roleResult['customCommand'] = command['hostLevelParams']['custom_command'] if 'structuredOut' in commandresult: roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut'])) else: roleResult['structuredOut'] = '' # let recovery manager know the current state if status == self.COMPLETED_STATUS: if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \ and self.controller.recovery_manager.configured_for_recovery(command['role']): if command['roleCommand'] == self.ROLE_COMMAND_START: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness(command['role'], False) logger.info("After EXECUTION_COMMAND (START), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS) logger.info("After EXECUTION_COMMAND (STOP/INSTALL), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND: if command['hostLevelParams'].has_key('custom_command') and \ command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART: self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS) self.controller.recovery_manager.update_config_staleness(command['role'], False) logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role']) ) pass # let ambari know that configuration tags were applied configHandler = ActualConfigHandler(self.config, self.configTags) if command.has_key('configurationTags'): configHandler.write_actual(command['configurationTags']) roleResult['configurationTags'] = command['configurationTags'] component = {'serviceName':command['serviceName'],'componentName':command['role']} if 'roleCommand' in command and \ (command['roleCommand'] == self.ROLE_COMMAND_START or (command['roleCommand'] == self.ROLE_COMMAND_INSTALL and component in LiveStatus.CLIENT_COMPONENTS) or (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and 'custom_command' in command['hostLevelParams'] and command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)): configHandler.write_actual_component(command['role'], command['configurationTags']) if 'clientsToUpdateConfigs' in command['hostLevelParams'] and command['hostLevelParams']['clientsToUpdateConfigs']: configHandler.write_client_components(command['serviceName'], command['configurationTags'], command['hostLevelParams']['clientsToUpdateConfigs']) roleResult['configurationTags'] = configHandler.read_actual_component( command['role']) elif status == self.FAILED_STATUS: if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \ and self.controller.recovery_manager.configured_for_recovery(command['role']): if command['roleCommand'] == self.ROLE_COMMAND_INSTALL: self.controller.recovery_manager.update_current_status(command['role'], self.controller.recovery_manager.INSTALL_FAILED) logger.info("After EXECUTION_COMMAND (INSTALL), with taskId=" + str(command['taskId']) + ", current state of " + command['role'] + " to " + self.controller.recovery_manager.get_current_status(command['role'])) self.commandStatuses.put_command_status(command, roleResult)