Beispiel #1
0
 def __init__(self, cluster, service, component, globalConfig, config):
     self.cluster = cluster
     self.service = service
     self.component = component
     self.globalConfig = globalConfig
     versionsFileDir = config.get('agent', 'prefix')
     self.versionsHandler = StackVersionsFileHandler(versionsFileDir)
     self.actualConfigHandler = ActualConfigHandler(config)
Beispiel #2
0
 def __init__(self, cluster, service, component, globalConfig, config,
              configTags):
     self.cluster = cluster
     self.service = service
     self.component = component
     self.globalConfig = globalConfig
     self.configTags = configTags
     self.actualConfigHandler = ActualConfigHandler(config, configTags)
Beispiel #3
0
 def __init__(self, cluster, service, component, globalConfig, config,
              configTags):
   self.cluster = cluster
   self.service = service
   self.component = component
   self.globalConfig = globalConfig
   versionsFileDir = config.get('agent', 'prefix')
   self.versionsHandler = StackVersionsFileHandler(versionsFileDir)
   self.configTags = configTags
   self.actualConfigHandler = ActualConfigHandler(config, configTags)
Beispiel #4
0
class LiveStatus:

    SERVICES = []
    CLIENT_COMPONENTS = []
    COMPONENTS = []

    LIVE_STATUS = "STARTED"
    DEAD_STATUS = "INSTALLED"

    def __init__(self, cluster, service, component, globalConfig, config,
                 configTags):
        self.logger = logging.getLogger()
        self.cluster = cluster
        self.service = service
        self.component = component
        self.globalConfig = globalConfig
        self.configTags = configTags
        self.actualConfigHandler = ActualConfigHandler(config, configTags)

    def build(self, component_status):
        """
    :param component_status: component status to include into report
    :return: populated livestatus dict
    """

        livestatus = {
            "componentName": self.component,
            "msg": "",
            "status": component_status,
            "clusterName": self.cluster,
            "serviceName": self.service,
            "stackVersion": ""  # TODO: populate ?
        }

        active_config = self.actualConfigHandler.read_actual_component(
            self.component)
        if active_config is not None:
            livestatus['configurationTags'] = active_config

        self.logger.debug(
            "The live status for component %s of service %s is %s",
            self.component, self.service, livestatus)
        return livestatus
Beispiel #5
0
class LiveStatus:
    LIVE_STATUS = "STARTED"
    DEAD_STATUS = "INSTALLED"

    def __init__(self, cluster, service, component, globalConfig, config,
                 configTags):
        self.cluster = cluster
        self.service = service
        self.component = component
        self.globalConfig = globalConfig
        self.configTags = configTags
        self.actualConfigHandler = ActualConfigHandler(config, configTags)

    def build(self, component_status):
        """
    :param component_status: component status to include into report
    :return: populated livestatus dict
    """
        global LIVE_STATUS, DEAD_STATUS

        livestatus = {
            "componentName": self.component,
            "msg": "",
            "status": component_status,
            "clusterName": self.cluster,
            "serviceName": self.service,
            "stackVersion": ""  # TODO: populate ?
        }

        active_config = self.actualConfigHandler.read_actual_component(
            self.component)
        if active_config is not None:
            livestatus['configurationTags'] = active_config

        logger.debug("The live status for component " + str(self.component) +
                     " of service " + str(self.service) + " is " +
                     str(livestatus))
        return livestatus
Beispiel #6
0
    def execute_command(self, command):
        '''
    Executes commands of type EXECUTION_COMMAND
    '''
        clusterName = command['clusterName']
        commandId = command['commandId']
        isCommandBackground = command[
            'commandType'] == self.BACKGROUND_EXECUTION_COMMAND
        isAutoExecuteCommand = command[
            'commandType'] == self.AUTO_EXECUTION_COMMAND
        message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \
                  "cluster {cluster}.".format(
                  commandId = str(commandId), taskId = str(command['taskId']),
                  role=command['role'], cluster=clusterName)
        logger.info(message)

        taskId = command['taskId']
        # Preparing 'IN_PROGRESS' report
        in_progress_status = self.commandStatuses.generate_report_template(
            command)
        # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
        # server. The prefix is defined in agent-config.ini
        if not isAutoExecuteCommand:
            in_progress_status.update({
                'tmpout':
                self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
                'tmperr':
                self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
                'structuredOut':
                self.tmpdir + os.sep + 'structured-out-' + str(taskId) +
                '.json',
                'status':
                self.IN_PROGRESS_STATUS
            })
        else:
            in_progress_status.update({
                'tmpout':
                self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt',
                'tmperr':
                self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt',
                'structuredOut':
                self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) +
                '.json',
                'status':
                self.IN_PROGRESS_STATUS
            })

        self.commandStatuses.put_command_status(command, in_progress_status)

        numAttempts = 0
        retryDuration = 0  # even with 0 allow one attempt
        retryAble = False
        delay = 1
        log_command_output = True
        if 'commandParams' in command and 'log_output' in command[
                'commandParams'] and "false" == command['commandParams'][
                    'log_output']:
            log_command_output = False

        if 'commandParams' in command:
            if 'max_duration_for_retries' in command['commandParams']:
                retryDuration = int(
                    command['commandParams']['max_duration_for_retries'])
            if 'command_retry_enabled' in command['commandParams']:
                retryAble = command['commandParams'][
                    'command_retry_enabled'] == "true"
        if isAutoExecuteCommand:
            retryAble = False

        logger.info(
            "Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}, log_output = {log_command_output}"
            .format(taskId=taskId,
                    retryAble=retryAble,
                    retryDuration=retryDuration,
                    log_command_output=log_command_output))
        while retryDuration >= 0:
            numAttempts += 1
            start = 0
            if retryAble:
                start = int(time.time())
            # running command
            commandresult = self.customServiceOrchestrator.runCommand(
                command,
                in_progress_status['tmpout'],
                in_progress_status['tmperr'],
                override_output_files=numAttempts == 1,
                retry=numAttempts > 1)
            end = 1
            if retryAble:
                end = int(time.time())
            retryDuration -= (end - start)

            # dumping results
            if isCommandBackground:
                logger.info(
                    "Command is background command, quit retrying. Exit code: {exitCode}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
                    .format(cid=taskId,
                            exitCode=commandresult['exitcode'],
                            retryAble=retryAble,
                            retryDuration=retryDuration,
                            delay=delay))
                return
            else:
                if commandresult['exitcode'] == 0:
                    status = self.COMPLETED_STATUS
                else:
                    status = self.FAILED_STATUS
                    if (commandresult['exitcode']
                            == -signal.SIGTERM) or (commandresult['exitcode']
                                                    == -signal.SIGKILL):
                        logger.info(
                            'Command {cid} was canceled!'.format(cid=taskId))
                        break

            if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0:
                delay = self.get_retry_delay(delay)
                if delay > retryDuration:
                    delay = retryDuration
                retryDuration -= delay  # allow one last attempt
                commandresult[
                    'stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n"
                logger.info(
                    "Retrying command id {cid} after a wait of {delay}".format(
                        cid=taskId, delay=delay))
                time.sleep(delay)
                continue
            else:
                logger.info(
                    "Quit retrying for command id {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
                    .format(cid=taskId,
                            status=status,
                            retryAble=retryAble,
                            retryDuration=retryDuration,
                            delay=delay))
                break

        # final result to stdout
        commandresult[
            'stdout'] += '\n\nCommand completed successfully!\n' if status == self.COMPLETED_STATUS else '\n\nCommand failed after ' + str(
                numAttempts) + ' tries\n'
        logger.info('Command {cid} completed successfully!'.format(
            cid=taskId) if status == self.COMPLETED_STATUS else
                    'Command {cid} failed after {attempts} tries'.
                    format(cid=taskId, attempts=numAttempts))

        roleResult = self.commandStatuses.generate_report_template(command)
        roleResult.update({
            'stdout': commandresult['stdout'],
            'stderr': commandresult['stderr'],
            'exitCode': commandresult['exitcode'],
            'status': status,
        })

        if self.config.has_option("logging","log_command_executes") \
            and int(self.config.get("logging", "log_command_executes")) == 1 \
            and log_command_output:

            if roleResult['stdout'] != '':
                logger.info("Begin command output log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])
                self.log_command_output(roleResult['stdout'],
                                        str(command['taskId']))
                logger.info("End command output log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])

            if roleResult['stderr'] != '':
                logger.info("Begin command stderr log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])
                self.log_command_output(roleResult['stderr'],
                                        str(command['taskId']))
                logger.info("End command stderr log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])

        if roleResult['stdout'] == '':
            roleResult['stdout'] = 'None'
        if roleResult['stderr'] == '':
            roleResult['stderr'] = 'None'

        # let ambari know name of custom command
        if command['hostLevelParams'].has_key('custom_command'):
            roleResult['customCommand'] = command['hostLevelParams'][
                'custom_command']

        if 'structuredOut' in commandresult:
            roleResult['structuredOut'] = str(
                json.dumps(commandresult['structuredOut']))
        else:
            roleResult['structuredOut'] = ''

        # let recovery manager know the current state
        if status == self.COMPLETED_STATUS:
            if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
                and self.controller.recovery_manager.configured_for_recovery(command['role']):
                if command['roleCommand'] == self.ROLE_COMMAND_START:
                    self.controller.recovery_manager.update_current_status(
                        command['role'], LiveStatus.LIVE_STATUS)
                    self.controller.recovery_manager.update_config_staleness(
                        command['role'], False)
                    logger.info(
                        "After EXECUTION_COMMAND (START), with taskId=" +
                        str(command['taskId']) + ", current state of " +
                        command['role'] + " to " +
                        self.controller.recovery_manager.get_current_status(
                            command['role']))
                elif command[
                        'roleCommand'] == self.ROLE_COMMAND_STOP or command[
                            'roleCommand'] == self.ROLE_COMMAND_INSTALL:
                    self.controller.recovery_manager.update_current_status(
                        command['role'], LiveStatus.DEAD_STATUS)
                    logger.info(
                        "After EXECUTION_COMMAND (STOP/INSTALL), with taskId="
                        + str(command['taskId']) + ", current state of " +
                        command['role'] + " to " +
                        self.controller.recovery_manager.get_current_status(
                            command['role']))
                elif command[
                        'roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND:
                    if command['hostLevelParams'].has_key('custom_command') and \
                            command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART:
                        self.controller.recovery_manager.update_current_status(
                            command['role'], LiveStatus.LIVE_STATUS)
                        self.controller.recovery_manager.update_config_staleness(
                            command['role'], False)
                        logger.info(
                            "After EXECUTION_COMMAND (RESTART), current state of "
                            + command['role'] + " to " +
                            self.controller.recovery_manager.
                            get_current_status(command['role']))
            pass

            # let ambari know that configuration tags were applied
            configHandler = ActualConfigHandler(self.config, self.configTags)

            if command.has_key('configurationTags'):
                configHandler.write_actual(command['configurationTags'])
                roleResult['configurationTags'] = command['configurationTags']
            component = {
                'serviceName': command['serviceName'],
                'componentName': command['role']
            }
            if 'roleCommand' in command and \
                (command['roleCommand'] == self.ROLE_COMMAND_START or
                   (command['roleCommand'] == self.ROLE_COMMAND_INSTALL and component in LiveStatus.CLIENT_COMPONENTS) or
                     (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and
                        'custom_command' in command['hostLevelParams'] and
                            command['hostLevelParams']['custom_command'] in (self.CUSTOM_COMMAND_RESTART, self.CUSTOM_COMMAND_START))):
                configHandler.write_actual_component(
                    command['role'], command['configurationTags'])
                if 'clientsToUpdateConfigs' in command[
                        'hostLevelParams'] and command['hostLevelParams'][
                            'clientsToUpdateConfigs']:
                    configHandler.write_client_components(
                        command['serviceName'], command['configurationTags'],
                        command['hostLevelParams']['clientsToUpdateConfigs'])
                roleResult[
                    'configurationTags'] = configHandler.read_actual_component(
                        command['role'])
        elif status == self.FAILED_STATUS:
            if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
                    and self.controller.recovery_manager.configured_for_recovery(command['role']):
                if command['roleCommand'] == self.ROLE_COMMAND_INSTALL:
                    self.controller.recovery_manager.update_current_status(
                        command['role'],
                        self.controller.recovery_manager.INSTALL_FAILED)
                    logger.info(
                        "After EXECUTION_COMMAND (INSTALL), with taskId=" +
                        str(command['taskId']) + ", current state of " +
                        command['role'] + " to " +
                        self.controller.recovery_manager.get_current_status(
                            command['role']))

        self.commandStatuses.put_command_status(command, roleResult)
  def executeCommand(self, command):
    clusterName = command['clusterName']
    commandId = command['commandId']
    hostname = command['hostname']
    params = command['hostLevelParams']
    clusterHostInfo = command['clusterHostInfo']
    roleCommand = command['roleCommand']
    serviceName = command['serviceName']
    configurations = command['configurations']
    result = []

    logger.info("Executing command with id = " + str(commandId) +\
                " for role = " + command['role'] + " of " +\
                "cluster " + clusterName)
    logger.debug(pprint.pformat(command))

    taskId = command['taskId']
    # Preparing 'IN_PROGRESS' report
    self.commandInProgress = {
      'role': command['role'],
      'actionId': commandId,
      'taskId': taskId,
      'clusterName': clusterName,
      'serviceName': serviceName,
      'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
      'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
      'roleCommand': roleCommand
    }
    # running command
    if command['commandType'] == ActionQueue.EXECUTION_COMMAND:
      if command['roleCommand'] == ActionQueue.UPGRADE_STATUS:
        commandresult = self.upgradeExecutor.perform_stack_upgrade(command, self.commandInProgress['tmpout'],
          self.commandInProgress['tmperr'])
      else:
        commandresult = self.puppetExecutor.runCommand(command, self.commandInProgress['tmpout'],
          self.commandInProgress['tmperr'])
      # dumping results
    self.commandInProgress = None
    status = "COMPLETED"
    if commandresult['exitcode'] != 0:
      status = "FAILED"

    # assume some puppet plumbing to run these commands
    roleResult = {'role': command['role'],
                  'actionId': commandId,
                  'taskId': command['taskId'],
                  'stdout': commandresult['stdout'],
                  'clusterName': clusterName,
                  'stderr': commandresult['stderr'],
                  'exitCode': commandresult['exitcode'],
                  'serviceName': serviceName,
                  'status': status,
                  'roleCommand': roleCommand}
    if roleResult['stdout'] == '':
      roleResult['stdout'] = 'None'
    if roleResult['stderr'] == '':
      roleResult['stderr'] = 'None'

    # let ambari know that configuration tags were applied
    if status == 'COMPLETED':
      configHandler = ActualConfigHandler(self.config)
      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']

      if command.has_key('roleCommand') and command['roleCommand'] == 'START':
        configHandler.copy_to_component(command['role'])
        roleResult['configurationTags'] = configHandler.read_actual_component(command['role'])

    result.append(roleResult)
    return result
Beispiel #8
0
    def execute_command(self, command):
        '''
    Executes commands of type EXECUTION_COMMAND
    '''
        clusterName = command['clusterName']
        commandId = command['commandId']
        isCommandBackground = command[
            'commandType'] == self.BACKGROUND_EXECUTION_COMMAND
        message = "Executing command with id = {commandId} for role = {role} of " \
                  "cluster {cluster}.".format(
                  commandId = str(commandId), role=command['role'],
                  cluster=clusterName)
        logger.info(message)
        logger.debug(pprint.pformat(command))

        taskId = command['taskId']
        # Preparing 'IN_PROGRESS' report
        in_progress_status = self.commandStatuses.generate_report_template(
            command)
        # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
        # server. The prefix is defined in agent-config.ini
        in_progress_status.update({
            'tmpout':
            self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
            'tmperr':
            self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
            'structuredOut':
            self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
            'status':
            self.IN_PROGRESS_STATUS
        })
        self.commandStatuses.put_command_status(command, in_progress_status)

        # running command
        commandresult = self.customServiceOrchestrator.runCommand(
            command, in_progress_status['tmpout'],
            in_progress_status['tmperr'])

        # dumping results
        if isCommandBackground:
            return
        else:
            status = self.COMPLETED_STATUS if commandresult[
                'exitcode'] == 0 else self.FAILED_STATUS
        roleResult = self.commandStatuses.generate_report_template(command)
        roleResult.update({
            'stdout': commandresult['stdout'],
            'stderr': commandresult['stderr'],
            'exitCode': commandresult['exitcode'],
            'status': status,
        })
        if roleResult['stdout'] == '':
            roleResult['stdout'] = 'None'
        if roleResult['stderr'] == '':
            roleResult['stderr'] = 'None'

        # let ambari know name of custom command
        if command['hostLevelParams'].has_key('custom_command'):
            roleResult['customCommand'] = command['hostLevelParams'][
                'custom_command']

        if 'structuredOut' in commandresult:
            roleResult['structuredOut'] = str(
                json.dumps(commandresult['structuredOut']))
        else:
            roleResult['structuredOut'] = ''

        # let ambari know that configuration tags were applied
        if status == self.COMPLETED_STATUS:
            configHandler = ActualConfigHandler(self.config, self.configTags)
            #update
            if command.has_key('forceRefreshConfigTags') and len(
                    command['forceRefreshConfigTags']) > 0:

                forceRefreshConfigTags = command['forceRefreshConfigTags']
                logger.info("Got refresh additional component tags command")

                for configTag in forceRefreshConfigTags:
                    configHandler.update_component_tag(
                        command['role'], configTag,
                        command['configurationTags'][configTag])

                roleResult[
                    'customCommand'] = self.CUSTOM_COMMAND_RESTART  # force restart for component to evict stale_config on server side
                command[
                    'configurationTags'] = configHandler.read_actual_component(
                        command['role'])

            if command.has_key('configurationTags'):
                configHandler.write_actual(command['configurationTags'])
                roleResult['configurationTags'] = command['configurationTags']
            component = {
                'serviceName': command['serviceName'],
                'componentName': command['role']
            }
            if command.has_key('roleCommand') and \
              (command['roleCommand'] == self.ROLE_COMMAND_START or \
              (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \
              and component in LiveStatus.CLIENT_COMPONENTS) or \
              (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \
              command['hostLevelParams'].has_key('custom_command') and \
              command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
                configHandler.write_actual_component(
                    command['role'], command['configurationTags'])
                if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \
                  command['hostLevelParams']['clientsToUpdateConfigs']:
                    configHandler.write_client_components(
                        command['serviceName'], command['configurationTags'],
                        command['hostLevelParams']['clientsToUpdateConfigs'])
                roleResult[
                    'configurationTags'] = configHandler.read_actual_component(
                        command['role'])

        self.commandStatuses.put_command_status(command, roleResult)
Beispiel #9
0
class LiveStatus:

  SERVICES = [
    "HDFS", "MAPREDUCE", "GANGLIA", "HBASE",
    "NAGIOS", "ZOOKEEPER", "OOZIE", "HCATALOG",
    "KERBEROS", "TEMPLETON", "HIVE", "WEBHCAT",
    "YARN", "MAPREDUCE2", "FLUME", "TEZ",
    "FALCON", "STORM"
  ]

  CLIENT_COMPONENTS = [
    {"serviceName" : "HBASE",
     "componentName" : "HBASE_CLIENT"},
    {"serviceName" : "HDFS",
     "componentName" : "HDFS_CLIENT"},
    {"serviceName" : "MAPREDUCE",
     "componentName" : "MAPREDUCE_CLIENT"},
    {"serviceName" : "ZOOKEEPER",
     "componentName" : "ZOOKEEPER_CLIENT"},
    {"serviceName" : "OOZIE",
     "componentName" : "OOZIE_CLIENT"},
    {"serviceName" : "HCATALOG",
     "componentName" : "HCAT"},
    {"serviceName" : "HIVE",
     "componentName" : "HIVE_CLIENT"},
    {"serviceName" : "YARN",
     "componentName" : "YARN_CLIENT"},
    {"serviceName" : "MAPREDUCE2",
     "componentName" : "MAPREDUCE2_CLIENT"},
    {"serviceName" : "PIG",
     "componentName" : "PIG"},
    {"serviceName" : "SQOOP",
     "componentName" : "SQOOP"},
    {"serviceName" : "TEZ",
     "componentName" : "TEZ_CLIENT"},
    {"serviceName" : "FALCON",
     "componentName" : "FALCON_CLIENT"}
  ]

  COMPONENTS = [
      {"serviceName" : "HDFS",
       "componentName" : "DATANODE"},
      {"serviceName" : "HDFS",
       "componentName" : "NAMENODE"},
      {"serviceName" : "HDFS",
       "componentName" : "SECONDARY_NAMENODE"},
      {"serviceName" : "HDFS",
       "componentName" : "JOURNALNODE"},
      {"serviceName" : "HDFS",
       "componentName" : "ZKFC"},

      {"serviceName" : "MAPREDUCE",
       "componentName" : "JOBTRACKER"},
      {"serviceName" : "MAPREDUCE",
       "componentName" : "TASKTRACKER"},

      {"serviceName" : "GANGLIA",
       "componentName" : "GANGLIA_SERVER"},
      {"serviceName" : "GANGLIA",
       "componentName" : "GANGLIA_MONITOR"},

      {"serviceName" : "HBASE",
       "componentName" : "HBASE_MASTER"},
      {"serviceName" : "HBASE",
       "componentName" : "HBASE_REGIONSERVER"},

      {"serviceName" : "NAGIOS",
       "componentName" : "NAGIOS_SERVER"},

      {"serviceName" : "FLUME",
       "componentName" : "FLUME_SERVER"},

      {"serviceName" : "ZOOKEEPER",
       "componentName" : "ZOOKEEPER_SERVER"},

      {"serviceName" : "OOZIE",
       "componentName" : "OOZIE_SERVER"},

      {"serviceName" : "HCATALOG",
       "componentName" : "HCATALOG_SERVER"},

      {"serviceName" : "KERBEROS",
       "componentName" : "KERBEROS_SERVER"},

      {"serviceName" : "HIVE",
       "componentName" : "HIVE_SERVER"},
      {"serviceName" : "HIVE",
       "componentName" : "HIVE_METASTORE"},
      {"serviceName" : "HIVE",
       "componentName" : "MYSQL_SERVER"},

      {"serviceName" : "WEBHCAT",
       "componentName" : "WEBHCAT_SERVER"},

      {"serviceName" : "YARN",
       "componentName" : "RESOURCEMANAGER"},
      {"serviceName" : "YARN",
       "componentName" : "NODEMANAGER"},
      {"serviceName" : "YARN",
       "componentName" : "APP_TIMELINE_SERVER"},

      {"serviceName" : "MAPREDUCE2",
       "componentName" : "HISTORYSERVER"},

      {"serviceName" : "FALCON",
       "componentName" : "FALCON_SERVER"},

      {"serviceName" : "STORM",
       "componentName" : "NIMBUS"},
      {"serviceName" : "STORM",
       "componentName" : "STORM_REST_API"},
      {"serviceName" : "STORM",
       "componentName" : "SUPERVISOR"},
      {"serviceName" : "STORM",
       "componentName" : "STORM_UI_SERVER"},
      {"serviceName" : "STORM",
       "componentName" : "DRPC_SERVER"}
  ]

  LIVE_STATUS = "STARTED"
  DEAD_STATUS = "INSTALLED"

  def __init__(self, cluster, service, component, globalConfig, config,
               configTags):
    self.cluster = cluster
    self.service = service
    self.component = component
    self.globalConfig = globalConfig
    versionsFileDir = config.get('agent', 'prefix')
    self.versionsHandler = StackVersionsFileHandler(versionsFileDir)
    self.configTags = configTags
    self.actualConfigHandler = ActualConfigHandler(config, configTags)

  def belongsToService(self, component):
    #TODO: Should also check belonging of server to cluster
    return component['serviceName'] == self.service

  def build(self, forsed_component_status = None):
    """
    If forsed_component_status is explicitly defined, than StatusCheck methods are
    not used. This feature has been added to support custom (ver 2.0) services.
    """
    global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS

    livestatus = None
    component = {"serviceName" : self.service, "componentName" : self.component}
    if forsed_component_status: # If already determined
      status = forsed_component_status  # Nothing to do
    elif component in self.CLIENT_COMPONENTS:
      status = self.DEAD_STATUS # CLIENT components can't have status STARTED
    elif component in self.COMPONENTS:
      statusCheck = StatusCheck(AmbariConfig.servicesToPidNames,
                                AmbariConfig.pidPathesVars, self.globalConfig,
                                AmbariConfig.servicesToLinuxUser)
      serviceStatus = statusCheck.getStatus(self.component)
      if serviceStatus is None:
        logger.warn("There is no service to pid mapping for " + self.component)
      status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS

    livestatus ={"componentName" : self.component,
                 "msg" : "",
                 "status" : status,
                 "clusterName" : self.cluster,
                 "serviceName" : self.service,
                 "stackVersion": self.versionsHandler.
                 read_stack_version(self.component)
    }
    active_config = self.actualConfigHandler.read_actual_component(self.component)
    if not active_config is None:
      livestatus['configurationTags'] = active_config

    logger.debug("The live status for component " + str(self.component) +\
                " of service " + str(self.service) + " is " + str(livestatus))
    return livestatus
Beispiel #10
0
  def execute_command(self, command):
    '''
    Executes commands of type  EXECUTION_COMMAND
    '''
    clusterName = command['clusterName']
    commandId = command['commandId']

    message = "Executing command with id = {commandId} for role = {role} of " \
              "cluster {cluster}.".format(
              commandId = str(commandId), role=command['role'],
              cluster=clusterName)
    logger.info(message)
    logger.debug(pprint.pformat(command))

    taskId = command['taskId']
    # Preparing 'IN_PROGRESS' report
    in_progress_status = self.commandStatuses.generate_report_template(command)
    in_progress_status.update({
      'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
      'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
      'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
      'status': self.IN_PROGRESS_STATUS
    })
    self.commandStatuses.put_command_status(command, in_progress_status)
    # running command
    commandresult = self.customServiceOrchestrator.runCommand(command,
      in_progress_status['tmpout'], in_progress_status['tmperr'])
    # dumping results
    status = self.COMPLETED_STATUS
    if commandresult['exitcode'] != 0:
      status = self.FAILED_STATUS
    roleResult = self.commandStatuses.generate_report_template(command)
    roleResult.update({
      'stdout': commandresult['stdout'],
      'stderr': commandresult['stderr'],
      'exitCode': commandresult['exitcode'],
      'status': status,
    })
    if roleResult['stdout'] == '':
      roleResult['stdout'] = 'None'
    if roleResult['stderr'] == '':
      roleResult['stderr'] = 'None'

    # let ambari know name of custom command
    if command['hostLevelParams'].has_key('custom_command'):
      roleResult['customCommand'] = command['hostLevelParams']['custom_command']

    if 'structuredOut' in commandresult:
      roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut']))
    else:
      roleResult['structuredOut'] = ''

    # let ambari know that configuration tags were applied
    if status == self.COMPLETED_STATUS:
      configHandler = ActualConfigHandler(self.config, self.configTags)
      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']
      component = {'serviceName':command['serviceName'],'componentName':command['role']}
      if command.has_key('roleCommand') and \
        (command['roleCommand'] == self.ROLE_COMMAND_START or \
        (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \
        and component in LiveStatus.CLIENT_COMPONENTS) or \
        (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \
        command['hostLevelParams'].has_key('custom_command') and \
        command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
        configHandler.write_actual_component(command['role'], command['configurationTags'])
        configHandler.write_client_components(command['serviceName'], command['configurationTags'])
        roleResult['configurationTags'] = configHandler.read_actual_component(command['role'])

    self.commandStatuses.put_command_status(command, roleResult)
  def execute_command(self, command):
    '''
    Executes commands of type EXECUTION_COMMAND
    '''
    clusterName = command['clusterName']
    commandId = command['commandId']
    isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND
    isAutoExecuteCommand = command['commandType'] == self.AUTO_EXECUTION_COMMAND
    message = "Executing command with id = {commandId} for role = {role} of " \
              "cluster {cluster}.".format(
              commandId = str(commandId), role=command['role'],
              cluster=clusterName)
    logger.info(message)

    taskId = command['taskId']
    # Preparing 'IN_PROGRESS' report
    in_progress_status = self.commandStatuses.generate_report_template(command)
    # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
    # server. The prefix is defined in agent-config.ini
    if not isAutoExecuteCommand:
      in_progress_status.update({
        'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
        'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
        'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
        'status': self.IN_PROGRESS_STATUS
      })
    else:
      in_progress_status.update({
        'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt',
        'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt',
        'structuredOut' : self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json',
        'status': self.IN_PROGRESS_STATUS
      })

    self.commandStatuses.put_command_status(command, in_progress_status)

    numAttempts = 0
    retryDuration = 0  # even with 0 allow one attempt
    retryAble = False
    delay = 1
    if 'commandParams' in command:
      if 'max_duration_for_retries' in command['commandParams']:
        retryDuration = int(command['commandParams']['max_duration_for_retries'])
      if 'command_retry_enabled' in command['commandParams']:
        retryAble = command['commandParams']['command_retry_enabled'] == "true"
    if isAutoExecuteCommand:
      retryAble = False

    logger.debug("Command execution metadata - retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}".
                 format(retryAble=retryAble, retryDuration=retryDuration))
    while retryDuration >= 0:
      numAttempts += 1
      start = 0
      if retryAble:
        start = int(time.time())
      # running command
      commandresult = self.customServiceOrchestrator.runCommand(command,
                                                                in_progress_status['tmpout'],
                                                                in_progress_status['tmperr'],
                                                                override_output_files=numAttempts == 1,
                                                                retry=numAttempts > 1)
      end = 1
      if retryAble:
        end = int(time.time())
      retryDuration -= (end - start)

      # dumping results
      if isCommandBackground:
        return
      else:
        if commandresult['exitcode'] == 0:
          status = self.COMPLETED_STATUS
        else:
          status = self.FAILED_STATUS

      if status != self.COMPLETED_STATUS and retryAble == True and retryDuration > 0:
        delay = self.get_retry_delay(delay)
        if delay > retryDuration:
          delay = retryDuration
        retryDuration -= delay  # allow one last attempt
        logger.info("Retrying command id {cid} after a wait of {delay}".format(cid=taskId, delay=delay))
        time.sleep(delay)
        continue
      else:
        break

    roleResult = self.commandStatuses.generate_report_template(command)
    roleResult.update({
      'stdout': commandresult['stdout'],
      'stderr': commandresult['stderr'],
      'exitCode': commandresult['exitcode'],
      'status': status,
    })
    if roleResult['stdout'] == '':
      roleResult['stdout'] = 'None'
    if roleResult['stderr'] == '':
      roleResult['stderr'] = 'None'

    # let ambari know name of custom command
    if command['hostLevelParams'].has_key('custom_command'):
      roleResult['customCommand'] = command['hostLevelParams']['custom_command']

    if 'structuredOut' in commandresult:
      roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut']))
    else:
      roleResult['structuredOut'] = ''

    # let recovery manager know the current state
    if status == self.COMPLETED_STATUS:
      if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
          and self.controller.recovery_manager.configured_for_recovery(command['role']):
        if command['roleCommand'] == self.ROLE_COMMAND_START:
          self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
          self.controller.recovery_manager.update_config_staleness(command['role'], False)
          logger.info("After EXECUTION_COMMAND (START), current state of " + command['role'] + " to " +
                       self.controller.recovery_manager.get_current_status(command['role']) )
        elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL:
          self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS)
          logger.info("After EXECUTION_COMMAND (STOP/INSTALL), current state of " + command['role'] + " to " +
                       self.controller.recovery_manager.get_current_status(command['role']) )
        elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND:
          if command['hostLevelParams'].has_key('custom_command') and \
                  command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART:
            self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
            self.controller.recovery_manager.update_config_staleness(command['role'], False)
            logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " +
                         self.controller.recovery_manager.get_current_status(command['role']) )
      pass

      # let ambari know that configuration tags were applied
      configHandler = ActualConfigHandler(self.config, self.configTags)
      #update
      if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0  :

        forceRefreshConfigTags = command['forceRefreshConfigTags']
        logger.info("Got refresh additional component tags command")

        for configTag in forceRefreshConfigTags :
          configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag])

        roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side
        command['configurationTags'] = configHandler.read_actual_component(command['role'])

      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']
      component = {'serviceName':command['serviceName'],'componentName':command['role']}
      if command.has_key('roleCommand') and \
        (command['roleCommand'] == self.ROLE_COMMAND_START or \
        (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \
        and component in LiveStatus.CLIENT_COMPONENTS) or \
        (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \
        command['hostLevelParams'].has_key('custom_command') and \
        command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
        configHandler.write_actual_component(command['role'], command['configurationTags'])
        if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \
          command['hostLevelParams']['clientsToUpdateConfigs']:
          configHandler.write_client_components(command['serviceName'], command['configurationTags'],
                                                command['hostLevelParams']['clientsToUpdateConfigs'])
        roleResult['configurationTags'] = configHandler.read_actual_component(command['role'])

    self.commandStatuses.put_command_status(command, roleResult)
Beispiel #12
0
class LiveStatus:

  SERVICES = []
  CLIENT_COMPONENTS = []
  COMPONENTS = []

  LIVE_STATUS = "STARTED"
  DEAD_STATUS = "INSTALLED"

  def __init__(self, cluster, service, component, globalConfig, config,
               configTags):
    self.cluster = cluster
    self.service = service
    self.component = component
    self.globalConfig = globalConfig
    versionsFileDir = config.get('agent', 'prefix')
    self.versionsHandler = StackVersionsFileHandler(versionsFileDir)
    self.configTags = configTags
    self.actualConfigHandler = ActualConfigHandler(config, configTags)

  def belongsToService(self, component):
    #TODO: Should also check belonging of server to cluster
    return component['serviceName'] == self.service

  def build(self, forsed_component_status = None):
    """
    If forsed_component_status is explicitly defined, than StatusCheck methods are
    not used. This feature has been added to support custom (ver 2.0) services.
    """
    global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS

    component = {"serviceName" : self.service, "componentName" : self.component}
    if forsed_component_status: # If already determined
      status = forsed_component_status  # Nothing to do
    elif component in self.CLIENT_COMPONENTS:
      status = self.DEAD_STATUS # CLIENT components can't have status STARTED
    elif component in self.COMPONENTS:
      statusCheck = StatusCheck(AmbariConfig.servicesToPidNames,
                                AmbariConfig.pidPathVars, self.globalConfig,
                                AmbariConfig.servicesToLinuxUser)
      serviceStatus = statusCheck.getStatus(self.component)
      if serviceStatus is None:
        logger.warn("There is no service to pid mapping for " + self.component)
      status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS

    livestatus = {"componentName" : self.component,
                 "msg" : "",
                 "status" : status,
                 "clusterName" : self.cluster,
                 "serviceName" : self.service,
                 "stackVersion": self.versionsHandler.
                 read_stack_version(self.component)
    }
    
    active_config = self.actualConfigHandler.read_actual_component(self.component)
    if not active_config is None:
      livestatus['configurationTags'] = active_config

    logger.debug("The live status for component " + str(self.component) +\
                " of service " + str(self.service) + " is " + str(livestatus))
    return livestatus
Beispiel #13
0
class LiveStatus:

  SERVICES = []
  CLIENT_COMPONENTS = []
  COMPONENTS = []

  LIVE_STATUS = "STARTED"
  DEAD_STATUS = "INSTALLED"

  def __init__(self, cluster, service, component, globalConfig, config,
               configTags):
    self.cluster = cluster
    self.service = service
    self.component = component
    self.globalConfig = globalConfig
    versionsFileDir = config.get('agent', 'prefix')
    self.versionsHandler = StackVersionsFileHandler(versionsFileDir)
    self.configTags = configTags
    self.actualConfigHandler = ActualConfigHandler(config, configTags)

  def belongsToService(self, component):
    #TODO: Should also check belonging of server to cluster
    return component['serviceName'] == self.service

  def build(self, forced_component_status = None):
    """
    If forced_component_status is explicitly defined, than StatusCheck methods are
    not used. This feature has been added to support custom (ver 2.0) services.
    """
    global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS

    component = {"serviceName" : self.service, "componentName" : self.component}
    if forced_component_status: # If already determined
      status = forced_component_status  # Nothing to do
    elif component in self.CLIENT_COMPONENTS:
      status = self.DEAD_STATUS # CLIENT components can't have status STARTED
    elif component in self.COMPONENTS:
      statusCheck = StatusCheck(AmbariConfig.servicesToPidNames,
                                AmbariConfig.pidPathVars, self.globalConfig,
                                AmbariConfig.servicesToLinuxUser)
      serviceStatus = statusCheck.getStatus(self.component)
      if serviceStatus is None:
        logger.warn("There is no service to pid mapping for " + self.component)
      status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS

    livestatus = {"componentName" : self.component,
                 "msg" : "",
                 "status" : status,
                 "clusterName" : self.cluster,
                 "serviceName" : self.service,
                 "stackVersion": self.versionsHandler.
                 read_stack_version(self.component)
    }
    
    active_config = self.actualConfigHandler.read_actual_component(self.component)
    if not active_config is None:
      livestatus['configurationTags'] = active_config

    logger.debug("The live status for component " + str(self.component) +\
                " of service " + str(self.service) + " is " + str(livestatus))
    return livestatus
Beispiel #14
0
  def execute_command(self, command):
    '''
    Executes commands of type EXECUTION_COMMAND
    '''
    clusterName = command['clusterName']
    commandId = command['commandId']
    isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND
    isAutoExecuteCommand = command['commandType'] == self.AUTO_EXECUTION_COMMAND
    message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \
              "cluster {cluster}.".format(
              commandId = str(commandId), taskId = str(command['taskId']),
              role=command['role'], cluster=clusterName)
    logger.info(message)

    taskId = command['taskId']
    # Preparing 'IN_PROGRESS' report
    in_progress_status = self.commandStatuses.generate_report_template(command)
    # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
    # server. The prefix is defined in agent-config.ini
    if not isAutoExecuteCommand:
      in_progress_status.update({
        'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
        'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
        'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
        'status': self.IN_PROGRESS_STATUS
      })
    else:
      in_progress_status.update({
        'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt',
        'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt',
        'structuredOut' : self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json',
        'status': self.IN_PROGRESS_STATUS
      })

    self.commandStatuses.put_command_status(command, in_progress_status)

    numAttempts = 0
    retryDuration = 0  # even with 0 allow one attempt
    retryAble = False
    delay = 1
    if 'commandParams' in command:
      if 'max_duration_for_retries' in command['commandParams']:
        retryDuration = int(command['commandParams']['max_duration_for_retries'])
      if 'command_retry_enabled' in command['commandParams']:
        retryAble = command['commandParams']['command_retry_enabled'] == "true"
    if isAutoExecuteCommand:
      retryAble = False

    logger.debug("Command execution metadata - retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}".
                 format(retryAble=retryAble, retryDuration=retryDuration))
    while retryDuration >= 0:
      numAttempts += 1
      start = 0
      if retryAble:
        start = int(time.time())
      # running command
      commandresult = self.customServiceOrchestrator.runCommand(command,
                                                                in_progress_status['tmpout'],
                                                                in_progress_status['tmperr'],
                                                                override_output_files=numAttempts == 1,
                                                                retry=numAttempts > 1)
      end = 1
      if retryAble:
        end = int(time.time())
      retryDuration -= (end - start)

      # dumping results
      if isCommandBackground:
        return
      else:
        if commandresult['exitcode'] == 0:
          status = self.COMPLETED_STATUS
        else:
          status = self.FAILED_STATUS

      if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0:
        delay = self.get_retry_delay(delay)
        if delay > retryDuration:
          delay = retryDuration
        retryDuration -= delay  # allow one last attempt
        logger.info("Retrying command id {cid} after a wait of {delay}".format(cid=taskId, delay=delay))
        time.sleep(delay)
        continue
      else:
        break

    roleResult = self.commandStatuses.generate_report_template(command)
    roleResult.update({
      'stdout': commandresult['stdout'],
      'stderr': commandresult['stderr'],
      'exitCode': commandresult['exitcode'],
      'status': status,
    })

    if self.config.has_option("logging","log_command_executes") and int(self.config.get("logging",
                                                                                       "log_command_executes")) == 1:
        if roleResult['stdout'] != '':
            logger.info("Begin command output log for command with id = " + str(command['taskId']) + ", role = "
                        + command['role'] + ", roleCommand = " + command['roleCommand'])
            logger.info(roleResult['stdout'])
            logger.info("End command output log for command with id = " + str(command['taskId']) + ", role = "
                        + command['role'] + ", roleCommand = " + command['roleCommand'])

        if roleResult['stderr'] != '':
            logger.info("Begin command stderr log for command with id = " + str(command['taskId']) + ", role = "
                        + command['role'] + ", roleCommand = " + command['roleCommand'])
            logger.info(roleResult['stderr'])
            logger.info("End command stderr log for command with id = " + str(command['taskId']) + ", role = "
                        + command['role'] + ", roleCommand = " + command['roleCommand'])

    if roleResult['stdout'] == '':
      roleResult['stdout'] = 'None'
    if roleResult['stderr'] == '':
      roleResult['stderr'] = 'None'

    # let ambari know name of custom command
    if command['hostLevelParams'].has_key('custom_command'):
      roleResult['customCommand'] = command['hostLevelParams']['custom_command']

    if 'structuredOut' in commandresult:
      roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut']))
    else:
      roleResult['structuredOut'] = ''

    # let recovery manager know the current state
    if status == self.COMPLETED_STATUS:
      if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
          and self.controller.recovery_manager.configured_for_recovery(command['role']):
        if command['roleCommand'] == self.ROLE_COMMAND_START:
          self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
          self.controller.recovery_manager.update_config_staleness(command['role'], False)
          logger.info("After EXECUTION_COMMAND (START), with taskId=" + str(command['taskId']) +
                      ", current state of " + command['role'] + " to " +
                       self.controller.recovery_manager.get_current_status(command['role']) )
        elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL:
          self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS)
          logger.info("After EXECUTION_COMMAND (STOP/INSTALL), with taskId=" + str(command['taskId']) +
                      ", current state of " + command['role'] + " to " +
                       self.controller.recovery_manager.get_current_status(command['role']) )
        elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND:
          if command['hostLevelParams'].has_key('custom_command') and \
                  command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART:
            self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
            self.controller.recovery_manager.update_config_staleness(command['role'], False)
            logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " +
                         self.controller.recovery_manager.get_current_status(command['role']) )
      pass

      # let ambari know that configuration tags were applied
      configHandler = ActualConfigHandler(self.config, self.configTags)
      #update
      if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0  :

        forceRefreshConfigTags = command['forceRefreshConfigTags']
        logger.info("Got refresh additional component tags command")

        for configTag in forceRefreshConfigTags :
          configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag])

        roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side
        command['configurationTags'] = configHandler.read_actual_component(command['role'])

      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']
      if 'roleCommand' in command and \
          (command['roleCommand'] == self.ROLE_COMMAND_START or
               (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and
                  'custom_command' in command['hostLevelParams'] and
                      command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
        configHandler.write_actual_component(command['role'],
                                             command['configurationTags'])
        if 'clientsToUpdateConfigs' in command['hostLevelParams'] and command['hostLevelParams']['clientsToUpdateConfigs']:
          configHandler.write_client_components(command['serviceName'],
                                                command['configurationTags'],
                                                command['hostLevelParams']['clientsToUpdateConfigs'])
        roleResult['configurationTags'] = configHandler.read_actual_component(
            command['role'])

    self.commandStatuses.put_command_status(command, roleResult)
Beispiel #15
0
  def execute_command(self, command):
    '''
    Executes commands of type EXECUTION_COMMAND
    '''
    clusterName = command['clusterName']
    commandId = command['commandId']
    isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND
    message = "Executing command with id = {commandId} for role = {role} of " \
              "cluster {cluster}.".format(
              commandId = str(commandId), role=command['role'],
              cluster=clusterName)
    logger.info(message)

    taskId = command['taskId']
    # Preparing 'IN_PROGRESS' report
    in_progress_status = self.commandStatuses.generate_report_template(command)
    # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
    # server. The prefix is defined in agent-config.ini
    in_progress_status.update({
      'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
      'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
      'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
      'status': self.IN_PROGRESS_STATUS
    })
    self.commandStatuses.put_command_status(command, in_progress_status)

    # running command
    commandresult = self.customServiceOrchestrator.runCommand(command,
      in_progress_status['tmpout'], in_progress_status['tmperr'])


    # dumping results
    if isCommandBackground:
      return
    else:
      status = self.COMPLETED_STATUS if commandresult['exitcode'] == 0 else self.FAILED_STATUS
    roleResult = self.commandStatuses.generate_report_template(command)
    roleResult.update({
      'stdout': commandresult['stdout'],
      'stderr': commandresult['stderr'],
      'exitCode': commandresult['exitcode'],
      'status': status,
    })
    if roleResult['stdout'] == '':
      roleResult['stdout'] = 'None'
    if roleResult['stderr'] == '':
      roleResult['stderr'] = 'None'

    # let ambari know name of custom command
    if command['hostLevelParams'].has_key('custom_command'):
      roleResult['customCommand'] = command['hostLevelParams']['custom_command']

    if 'structuredOut' in commandresult:
      roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut']))
    else:
      roleResult['structuredOut'] = ''

    # let ambari know that configuration tags were applied
    if status == self.COMPLETED_STATUS:
      configHandler = ActualConfigHandler(self.config, self.configTags)
      #update
      if command.has_key('forceRefreshConfigTags') and len(command['forceRefreshConfigTags']) > 0  :

        forceRefreshConfigTags = command['forceRefreshConfigTags']
        logger.info("Got refresh additional component tags command")

        for configTag in forceRefreshConfigTags :
          configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag])

        roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side
        command['configurationTags'] = configHandler.read_actual_component(command['role'])

      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']
      component = {'serviceName':command['serviceName'],'componentName':command['role']}
      if command.has_key('roleCommand') and \
        (command['roleCommand'] == self.ROLE_COMMAND_START or \
        (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \
        and component in LiveStatus.CLIENT_COMPONENTS) or \
        (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \
        command['hostLevelParams'].has_key('custom_command') and \
        command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
        configHandler.write_actual_component(command['role'], command['configurationTags'])
        if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \
          command['hostLevelParams']['clientsToUpdateConfigs']:
          configHandler.write_client_components(command['serviceName'], command['configurationTags'],
                                                command['hostLevelParams']['clientsToUpdateConfigs'])
        roleResult['configurationTags'] = configHandler.read_actual_component(command['role'])

    self.commandStatuses.put_command_status(command, roleResult)
    def executeCommand(self, command):
        clusterName = command['clusterName']
        commandId = command['commandId']
        hostname = command['hostname']
        params = command['hostLevelParams']
        clusterHostInfo = command['clusterHostInfo']
        roleCommand = command['roleCommand']
        serviceName = command['serviceName']
        configurations = command['configurations']
        result = []

        logger.info("Executing command with id = " + str(commandId) +\
                    " for role = " + command['role'] + " of " +\
                    "cluster " + clusterName)
        logger.debug(pprint.pformat(command))

        taskId = command['taskId']
        # Preparing 'IN_PROGRESS' report
        self.commandInProgress = {
            'role': command['role'],
            'actionId': commandId,
            'taskId': taskId,
            'clusterName': clusterName,
            'serviceName': serviceName,
            'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
            'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
            'roleCommand': roleCommand
        }
        # running command
        if command['commandType'] == ActionQueue.EXECUTION_COMMAND:
            if command['roleCommand'] == ActionQueue.UPGRADE_STATUS:
                commandresult = self.upgradeExecutor.perform_stack_upgrade(
                    command, self.commandInProgress['tmpout'],
                    self.commandInProgress['tmperr'])
            else:
                commandresult = self.puppetExecutor.runCommand(
                    command, self.commandInProgress['tmpout'],
                    self.commandInProgress['tmperr'])
            # dumping results
        self.commandInProgress = None
        status = "COMPLETED"
        if commandresult['exitcode'] != 0:
            status = "FAILED"

        # assume some puppet plumbing to run these commands
        roleResult = {
            'role': command['role'],
            'actionId': commandId,
            'taskId': command['taskId'],
            'stdout': commandresult['stdout'],
            'clusterName': clusterName,
            'stderr': commandresult['stderr'],
            'exitCode': commandresult['exitcode'],
            'serviceName': serviceName,
            'status': status,
            'roleCommand': roleCommand
        }
        if roleResult['stdout'] == '':
            roleResult['stdout'] = 'None'
        if roleResult['stderr'] == '':
            roleResult['stderr'] = 'None'

        # let ambari know that configuration tags were applied
        if status == 'COMPLETED':
            configHandler = ActualConfigHandler(self.config)
            if command.has_key('configurationTags'):
                configHandler.write_actual(command['configurationTags'])
                roleResult['configurationTags'] = command['configurationTags']

            if command.has_key(
                    'roleCommand') and command['roleCommand'] == 'START':
                configHandler.copy_to_component(command['role'])
                roleResult[
                    'configurationTags'] = configHandler.read_actual_component(
                        command['role'])

        result.append(roleResult)
        return result
Beispiel #17
0
    def execute_command(self, command):
        '''
    Executes commands of type EXECUTION_COMMAND
    '''
        clusterName = command['clusterName']
        commandId = command['commandId']
        isCommandBackground = command[
            'commandType'] == self.BACKGROUND_EXECUTION_COMMAND
        isAutoExecuteCommand = command[
            'commandType'] == self.AUTO_EXECUTION_COMMAND
        message = "Executing command with id = {commandId} for role = {role} of " \
                  "cluster {cluster}.".format(
                  commandId = str(commandId), role=command['role'],
                  cluster=clusterName)
        logger.info(message)

        taskId = command['taskId']
        # Preparing 'IN_PROGRESS' report
        in_progress_status = self.commandStatuses.generate_report_template(
            command)
        # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
        # server. The prefix is defined in agent-config.ini
        if not isAutoExecuteCommand:
            in_progress_status.update({
                'tmpout':
                self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
                'tmperr':
                self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
                'structuredOut':
                self.tmpdir + os.sep + 'structured-out-' + str(taskId) +
                '.json',
                'status':
                self.IN_PROGRESS_STATUS
            })
        else:
            in_progress_status.update({
                'tmpout':
                self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt',
                'tmperr':
                self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt',
                'structuredOut':
                self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) +
                '.json',
                'status':
                self.IN_PROGRESS_STATUS
            })

        self.commandStatuses.put_command_status(command, in_progress_status)

        numAttempts = 0
        maxAttempts = 1
        retryAble = False
        delay = 1
        if 'commandParams' in command:
            if 'command_retry_max_attempt_count' in command['commandParams']:
                maxAttempts = int(command['commandParams']
                                  ['command_retry_max_attempt_count'])
            if 'command_retry_enabled' in command['commandParams']:
                retryAble = command['commandParams'][
                    'command_retry_enabled'] == "true"

        logger.debug(
            "Command execution metadata - retry enabled = {retryAble}, max attempt count = {maxAttemptCount}"
            .format(retryAble=retryAble, maxAttemptCount=maxAttempts))
        while numAttempts < maxAttempts:
            numAttempts += 1
            # running command
            commandresult = self.customServiceOrchestrator.runCommand(
                command,
                in_progress_status['tmpout'],
                in_progress_status['tmperr'],
                override_output_files=numAttempts == 1,
                retry=numAttempts > 1)

            # dumping results
            if isCommandBackground:
                return
            else:
                status = self.COMPLETED_STATUS if commandresult[
                    'exitcode'] == 0 else self.FAILED_STATUS

            if status != self.COMPLETED_STATUS and retryAble == True and maxAttempts > numAttempts:
                delay = self.get_retry_delay(delay)
                logger.info(
                    "Retrying command id {cid} after a wait of {delay}".format(
                        cid=taskId, delay=delay))
                time.sleep(delay)
                continue
            else:
                break

        roleResult = self.commandStatuses.generate_report_template(command)
        roleResult.update({
            'stdout': commandresult['stdout'],
            'stderr': commandresult['stderr'],
            'exitCode': commandresult['exitcode'],
            'status': status,
        })
        if roleResult['stdout'] == '':
            roleResult['stdout'] = 'None'
        if roleResult['stderr'] == '':
            roleResult['stderr'] = 'None'

        # let ambari know name of custom command
        if command['hostLevelParams'].has_key('custom_command'):
            roleResult['customCommand'] = command['hostLevelParams'][
                'custom_command']

        if 'structuredOut' in commandresult:
            roleResult['structuredOut'] = str(
                json.dumps(commandresult['structuredOut']))
        else:
            roleResult['structuredOut'] = ''

        # let ambari know that configuration tags were applied
        if status == self.COMPLETED_STATUS:
            configHandler = ActualConfigHandler(self.config, self.configTags)
            #update
            if command.has_key('forceRefreshConfigTags') and len(
                    command['forceRefreshConfigTags']) > 0:

                forceRefreshConfigTags = command['forceRefreshConfigTags']
                logger.info("Got refresh additional component tags command")

                for configTag in forceRefreshConfigTags:
                    configHandler.update_component_tag(
                        command['role'], configTag,
                        command['configurationTags'][configTag])

                roleResult[
                    'customCommand'] = self.CUSTOM_COMMAND_RESTART  # force restart for component to evict stale_config on server side
                command[
                    'configurationTags'] = configHandler.read_actual_component(
                        command['role'])

            if command.has_key('configurationTags'):
                configHandler.write_actual(command['configurationTags'])
                roleResult['configurationTags'] = command['configurationTags']
            component = {
                'serviceName': command['serviceName'],
                'componentName': command['role']
            }
            if command.has_key('roleCommand') and \
              (command['roleCommand'] == self.ROLE_COMMAND_START or \
              (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \
              and component in LiveStatus.CLIENT_COMPONENTS) or \
              (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \
              command['hostLevelParams'].has_key('custom_command') and \
              command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
                configHandler.write_actual_component(
                    command['role'], command['configurationTags'])
                if command['hostLevelParams'].has_key('clientsToUpdateConfigs') and \
                  command['hostLevelParams']['clientsToUpdateConfigs']:
                    configHandler.write_client_components(
                        command['serviceName'], command['configurationTags'],
                        command['hostLevelParams']['clientsToUpdateConfigs'])
                roleResult[
                    'configurationTags'] = configHandler.read_actual_component(
                        command['role'])

        self.commandStatuses.put_command_status(command, roleResult)
Beispiel #18
0
    def execute_command(self, command):
        '''
    Executes commands of type  EXECUTION_COMMAND
    '''
        clusterName = command['clusterName']
        commandId = command['commandId']

        logger.info("Executing command with id = " + str(commandId) +\
                    " for role = " + command['role'] + " of " +\
                    "cluster " + clusterName)
        logger.debug(pprint.pformat(command))

        taskId = command['taskId']
        # Preparing 'IN_PROGRESS' report
        in_progress_status = self.commandStatuses.generate_report_template(
            command)
        in_progress_status.update({
            'tmpout':
            self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
            'tmperr':
            self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
            'status':
            self.IN_PROGRESS_STATUS
        })
        self.commandStatuses.put_command_status(command, in_progress_status)
        # TODO: Add CustomServiceOrchestrator call somewhere here
        # running command
        # Create a new instance of executor for the current thread
        puppetExecutor = PuppetExecutor.PuppetExecutor(
            self.config.get('puppet', 'puppetmodules'),
            self.config.get('puppet', 'puppet_home'),
            self.config.get('puppet', 'facter_home'),
            self.config.get('agent', 'prefix'), self.config)
        commandresult = puppetExecutor.runCommand(command,
                                                  in_progress_status['tmpout'],
                                                  in_progress_status['tmperr'])

        # dumping results
        status = self.COMPLETED_STATUS
        if commandresult['exitcode'] != 0:
            status = self.FAILED_STATUS
        roleResult = self.commandStatuses.generate_report_template(command)
        # assume some puppet plumbing to run these commands
        roleResult.update({
            'stdout': commandresult['stdout'],
            'stderr': commandresult['stderr'],
            'exitCode': commandresult['exitcode'],
            'status': status,
        })
        if roleResult['stdout'] == '':
            roleResult['stdout'] = 'None'
        if roleResult['stderr'] == '':
            roleResult['stderr'] = 'None'

        # let ambari know that configuration tags were applied
        if status == self.COMPLETED_STATUS:
            configHandler = ActualConfigHandler(self.config)
            if command.has_key('configurationTags'):
                configHandler.write_actual(command['configurationTags'])
                roleResult['configurationTags'] = command['configurationTags']
            component = {
                'serviceName': command['serviceName'],
                'componentName': command['role']
            }
            if command.has_key('roleCommand') and \
              (command['roleCommand'] == self.ROLE_COMMAND_START or \
              (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \
              and component in LiveStatus.CLIENT_COMPONENTS)):
                configHandler.copy_to_component(command['role'])
                roleResult[
                    'configurationTags'] = configHandler.read_actual_component(
                        command['role'])
        self.commandStatuses.put_command_status(command, roleResult)
Beispiel #19
0
class LiveStatus:

    SERVICES = [
        "HDFS", "MAPREDUCE", "GANGLIA", "HBASE", "NAGIOS", "ZOOKEEPER",
        "OOZIE", "HCATALOG", "KERBEROS", "TEMPLETON", "HIVE", "WEBHCAT",
        "YARN", "MAPREDUCE2", "FLUME"
    ]

    CLIENT_COMPONENTS = [{
        "serviceName": "HBASE",
        "componentName": "HBASE_CLIENT"
    }, {
        "serviceName": "HDFS",
        "componentName": "HDFS_CLIENT"
    }, {
        "serviceName": "MAPREDUCE",
        "componentName": "MAPREDUCE_CLIENT"
    }, {
        "serviceName": "ZOOKEEPER",
        "componentName": "ZOOKEEPER_CLIENT"
    }, {
        "serviceName": "OOZIE",
        "componentName": "OOZIE_CLIENT"
    }, {
        "serviceName": "HCATALOG",
        "componentName": "HCAT"
    }, {
        "serviceName": "HIVE",
        "componentName": "HIVE_CLIENT"
    }, {
        "serviceName": "YARN",
        "componentName": "YARN_CLIENT"
    }, {
        "serviceName": "MAPREDUCE2",
        "componentName": "MAPREDUCE2_CLIENT"
    }, {
        "serviceName": "PIG",
        "componentName": "PIG"
    }, {
        "serviceName": "SQOOP",
        "componentName": "SQOOP"
    }]

    COMPONENTS = [
        {
            "serviceName": "HDFS",
            "componentName": "DATANODE"
        },
        {
            "serviceName": "HDFS",
            "componentName": "NAMENODE"
        },
        {
            "serviceName": "HDFS",
            "componentName": "SECONDARY_NAMENODE"
        },
        {
            "serviceName": "HDFS",
            "componentName": "JOURNALNODE"
        },
        {
            "serviceName": "HDFS",
            "componentName": "ZKFC"
        },
        {
            "serviceName": "MAPREDUCE",
            "componentName": "JOBTRACKER"
        },
        {
            "serviceName": "MAPREDUCE",
            "componentName": "TASKTRACKER"
        },
        {
            "serviceName": "GANGLIA",
            "componentName": "GANGLIA_SERVER"
        },
        {
            "serviceName": "GANGLIA",
            "componentName": "GANGLIA_MONITOR"
        },
        {
            "serviceName": "HBASE",
            "componentName": "HBASE_MASTER"
        },
        {
            "serviceName": "HBASE",
            "componentName": "HBASE_REGIONSERVER"
        },
        {
            "serviceName": "NAGIOS",
            "componentName": "NAGIOS_SERVER"
        },
        {
            "serviceName": "FLUME",
            "componentName": "FLUME_SERVER"
        },
        {
            "serviceName": "ZOOKEEPER",
            "componentName": "ZOOKEEPER_SERVER"
        },
        {
            "serviceName": "OOZIE",
            "componentName": "OOZIE_SERVER"
        },
        {
            "serviceName": "HCATALOG",
            "componentName": "HCATALOG_SERVER"
        },
        {
            "serviceName": "KERBEROS",
            "componentName": "KERBEROS_SERVER"
        },
        {
            "serviceName": "HIVE",
            "componentName": "HIVE_SERVER"
        },
        {
            "serviceName": "HIVE",
            "componentName": "HIVE_METASTORE"
        },
        {
            "serviceName": "HIVE",
            "componentName": "MYSQL_SERVER"
        },
        {
            "serviceName": "WEBHCAT",
            "componentName": "WEBHCAT_SERVER"
        },
        {
            "serviceName": "YARN",
            "componentName": "RESOURCEMANAGER"
        },
        {
            "serviceName": "YARN",
            "componentName": "NODEMANAGER"
        },
        {
            "serviceName": "MAPREDUCE2",
            "componentName": "HISTORYSERVER"
        },
    ]

    LIVE_STATUS = "STARTED"
    DEAD_STATUS = "INSTALLED"

    def __init__(self, cluster, service, component, globalConfig, config):
        self.cluster = cluster
        self.service = service
        self.component = component
        self.globalConfig = globalConfig
        versionsFileDir = config.get('agent', 'prefix')
        self.versionsHandler = StackVersionsFileHandler(versionsFileDir)
        self.actualConfigHandler = ActualConfigHandler(config)

    def belongsToService(self, component):
        #TODO: Should also check belonging of server to cluster
        return component['serviceName'] == self.service

    # Live status was stripped from heartbeat after revision e1718dd
    def build(self):
        global SERVICES, CLIENT_COMPONENTS, COMPONENTS, LIVE_STATUS, DEAD_STATUS
        statusCheck = StatusCheck(AmbariConfig.servicesToPidNames,
                                  AmbariConfig.pidPathesVars,
                                  self.globalConfig,
                                  AmbariConfig.servicesToLinuxUser)
        livestatus = None
        component = {
            "serviceName": self.service,
            "componentName": self.component
        }
        if component in self.COMPONENTS + self.CLIENT_COMPONENTS:
            # CLIENT components can't have status STARTED
            if component in self.CLIENT_COMPONENTS:
                status = self.DEAD_STATUS
            else:
                serviceStatus = statusCheck.getStatus(self.component)

                if serviceStatus is None:
                    logger.warn("There is no service to pid mapping for " +
                                self.component)
                status = self.LIVE_STATUS if serviceStatus else self.DEAD_STATUS

            livestatus = {
                "componentName":
                self.component,
                "msg":
                "",
                "status":
                status,
                "clusterName":
                self.cluster,
                "serviceName":
                self.service,
                "stackVersion":
                self.versionsHandler.read_stack_version(self.component)
            }
            active_config = self.actualConfigHandler.read_actual_component(
                self.component)
            if not active_config is None:
                livestatus['configurationTags'] = active_config

        logger.debug("The live status for component " + str(self.component) +\
                    " of service " + str(self.service) + " is " + str(livestatus))
        return livestatus
Beispiel #20
0
  def execute_command(self, command):
    '''
    Executes commands of type  EXECUTION_COMMAND
    '''
    clusterName = command['clusterName']
    commandId = command['commandId']
    command_format = self.determine_command_format_version(command)

    message = "Executing command with id = {commandId} for role = {role} of " \
              "cluster {cluster}. Command format={command_format}".format(
              commandId = str(commandId), role=command['role'],
              cluster=clusterName, command_format=command_format)
    logger.info(message)
    logger.debug(pprint.pformat(command))

    taskId = command['taskId']
    # Preparing 'IN_PROGRESS' report
    in_progress_status = self.commandStatuses.generate_report_template(command)
    in_progress_status.update({
      'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
      'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
      'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
      'status': self.IN_PROGRESS_STATUS
    })
    self.commandStatuses.put_command_status(command, in_progress_status)
    # running command
    if command_format == self.COMMAND_FORMAT_V1:
      # Create a new instance of executor for the current thread
      puppetExecutor = PuppetExecutor.PuppetExecutor(
        self.config.get('puppet', 'puppetmodules'),
        self.config.get('puppet', 'puppet_home'),
        self.config.get('puppet', 'facter_home'),
        self.config.get('agent', 'prefix'), self.config)
      commandresult = puppetExecutor.runCommand(command, in_progress_status['tmpout'],
        in_progress_status['tmperr'])
    else:
      commandresult = self.customServiceOrchestrator.runCommand(command,
        in_progress_status['tmpout'], in_progress_status['tmperr'])
    # dumping results
    status = self.COMPLETED_STATUS
    if commandresult['exitcode'] != 0:
      status = self.FAILED_STATUS
    roleResult = self.commandStatuses.generate_report_template(command)
    # assume some puppet plumbing to run these commands
    roleResult.update({
      'stdout': commandresult['stdout'],
      'stderr': commandresult['stderr'],
      'exitCode': commandresult['exitcode'],
      'status': status,
    })
    if roleResult['stdout'] == '':
      roleResult['stdout'] = 'None'
    if roleResult['stderr'] == '':
      roleResult['stderr'] = 'None'

    # let ambari know name of custom command
    if command['hostLevelParams'].has_key('custom_command'):
      roleResult['customCommand'] = command['hostLevelParams']['custom_command']

    if 'structuredOut' in commandresult:
      roleResult['structuredOut'] = str(commandresult['structuredOut'])
    else:
      roleResult['structuredOut'] = ''
    # let ambari know that configuration tags were applied
    if status == self.COMPLETED_STATUS:
      configHandler = ActualConfigHandler(self.config, self.configTags)
      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']
      component = {'serviceName':command['serviceName'],'componentName':command['role']}
      if command.has_key('roleCommand') and \
        (command['roleCommand'] == self.ROLE_COMMAND_START or \
        (command['roleCommand'] == self.ROLE_COMMAND_INSTALL \
        and component in LiveStatus.CLIENT_COMPONENTS) or \
        (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and \
        command['hostLevelParams'].has_key('custom_command') and \
        command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
        configHandler.write_actual_component(command['role'], command['configurationTags'])
        configHandler.write_client_components(command['serviceName'], command['configurationTags'])
        roleResult['configurationTags'] = configHandler.read_actual_component(command['role'])
    self.commandStatuses.put_command_status(command, roleResult)
Beispiel #21
0
    def execute_command(self, command):
        '''
    Executes commands of type EXECUTION_COMMAND
    '''
        clusterId = command['clusterId']
        commandId = command['commandId']
        isCommandBackground = command[
            'commandType'] == self.BACKGROUND_EXECUTION_COMMAND
        isAutoExecuteCommand = command[
            'commandType'] == self.AUTO_EXECUTION_COMMAND
        message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \
                  "cluster_id {cluster}.".format(
                  commandId = str(commandId), taskId = str(command['taskId']),
                  role=command['role'], cluster=clusterId)
        logger.info(message)

        taskId = command['taskId']
        # Preparing 'IN_PROGRESS' report
        in_progress_status = self.commandStatuses.generate_report_template(
            command)
        # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
        # server. The prefix is defined in agent-config.ini
        if not isAutoExecuteCommand:
            in_progress_status.update({
                'tmpout':
                self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
                'tmperr':
                self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
                'structuredOut':
                self.tmpdir + os.sep + 'structured-out-' + str(taskId) +
                '.json',
                'status':
                self.IN_PROGRESS_STATUS
            })
        else:
            in_progress_status.update({
                'tmpout':
                self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt',
                'tmperr':
                self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt',
                'structuredOut':
                self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) +
                '.json',
                'status':
                self.IN_PROGRESS_STATUS
            })

        self.commandStatuses.put_command_status(command, in_progress_status)

        numAttempts = 0
        retryDuration = 0  # even with 0 allow one attempt
        retryAble = False
        delay = 1
        log_command_output = True
        if 'commandParams' in command and 'log_output' in command[
                'commandParams'] and "false" == command['commandParams'][
                    'log_output']:
            log_command_output = False

        if 'commandParams' in command:
            if 'max_duration_for_retries' in command['commandParams']:
                retryDuration = int(
                    command['commandParams']['max_duration_for_retries'])
            if 'command_retry_enabled' in command['commandParams']:
                retryAble = command['commandParams'][
                    'command_retry_enabled'] == "true"
        if isAutoExecuteCommand:
            retryAble = False

        logger.info(
            "Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}, log_output = {log_command_output}"
            .format(taskId=taskId,
                    retryAble=retryAble,
                    retryDuration=retryDuration,
                    log_command_output=log_command_output))
        command_canceled = False
        while retryDuration >= 0:
            numAttempts += 1
            start = 0
            if retryAble:
                start = int(time.time())
            # running command
            commandresult = self.customServiceOrchestrator.runCommand(
                command,
                in_progress_status['tmpout'],
                in_progress_status['tmperr'],
                override_output_files=numAttempts == 1,
                retry=numAttempts > 1)
            end = 1
            if retryAble:
                end = int(time.time())
            retryDuration -= (end - start)

            # dumping results
            if isCommandBackground:
                logger.info(
                    "Command is background command, quit retrying. Exit code: {exitCode}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
                    .format(cid=taskId,
                            exitCode=commandresult['exitcode'],
                            retryAble=retryAble,
                            retryDuration=retryDuration,
                            delay=delay))
                return
            else:
                if commandresult['exitcode'] == 0:
                    status = self.COMPLETED_STATUS
                else:
                    status = self.FAILED_STATUS
                    if (commandresult['exitcode']
                            == -signal.SIGTERM) or (commandresult['exitcode']
                                                    == -signal.SIGKILL):
                        logger.info(
                            'Command with taskId = {cid} was canceled!'.format(
                                cid=taskId))
                        command_canceled = True
                        break

            if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0:
                delay = self.get_retry_delay(delay)
                if delay > retryDuration:
                    delay = retryDuration
                retryDuration -= delay  # allow one last attempt
                commandresult[
                    'stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n"
                logger.info(
                    "Retrying command with taskId = {cid} after a wait of {delay}"
                    .format(cid=taskId, delay=delay))
                if 'agentLevelParams' not in command:
                    command['agentLevelParams'] = {}

                command['agentLevelParams']['commandBeingRetried'] = "true"
                time.sleep(delay)
                continue
            else:
                logger.info(
                    "Quit retrying for command with taskId = {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
                    .format(cid=taskId,
                            status=status,
                            retryAble=retryAble,
                            retryDuration=retryDuration,
                            delay=delay))
                break

        # do not fail task which was rescheduled from server
        if command_canceled:
            with self.lock:
                with self.commandQueue.mutex:
                    for com in self.commandQueue.queue:
                        if com['taskId'] == command['taskId']:
                            logger.info(
                                'Command with taskId = {cid} was rescheduled by server. '
                                'Fail report on cancelled command won\'t be sent with heartbeat.'
                                .format(cid=taskId))
                            return

        # final result to stdout
        commandresult[
            'stdout'] += '\n\nCommand completed successfully!\n' if status == self.COMPLETED_STATUS else '\n\nCommand failed after ' + str(
                numAttempts) + ' tries\n'
        logger.info(
            'Command with taskId = {cid} completed successfully!'.format(
                cid=taskId) if status == self.COMPLETED_STATUS else
            'Command with taskId = {cid} failed after {attempts} tries'.
            format(cid=taskId, attempts=numAttempts))

        roleResult = self.commandStatuses.generate_report_template(command)
        roleResult.update({
            'stdout': commandresult['stdout'],
            'stderr': commandresult['stderr'],
            'exitCode': commandresult['exitcode'],
            'status': status,
        })

        if self.config.has_option("logging","log_command_executes") \
            and int(self.config.get("logging", "log_command_executes")) == 1 \
            and log_command_output:

            if roleResult['stdout'] != '':
                logger.info("Begin command output log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])
                self.log_command_output(roleResult['stdout'],
                                        str(command['taskId']))
                logger.info("End command output log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])

            if roleResult['stderr'] != '':
                logger.info("Begin command stderr log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])
                self.log_command_output(roleResult['stderr'],
                                        str(command['taskId']))
                logger.info("End command stderr log for command with id = " +
                            str(command['taskId']) + ", role = " +
                            command['role'] + ", roleCommand = " +
                            command['roleCommand'])

        if roleResult['stdout'] == '':
            roleResult['stdout'] = 'None'
        if roleResult['stderr'] == '':
            roleResult['stderr'] = 'None'

        # let ambari know name of custom command

        if 'commandParams' in command and command['commandParams'].has_key(
                'custom_command'):
            roleResult['customCommand'] = command['commandParams'][
                'custom_command']

        if 'structuredOut' in commandresult:
            roleResult['structuredOut'] = str(
                json.dumps(commandresult['structuredOut']))
        else:
            roleResult['structuredOut'] = ''

        # let recovery manager know the current state
        if status == self.COMPLETED_STATUS:
            # let ambari know that configuration tags were applied
            configHandler = ActualConfigHandler(self.config, self.configTags)
            """
      #update
      if 'commandParams' in command:
        command_params = command['commandParams']
        if command_params and command_params.has_key('forceRefreshConfigTags') and len(command_params['forceRefreshConfigTags']) > 0  :
          forceRefreshConfigTags = command_params['forceRefreshConfigTags'].split(',')
          logger.info("Got refresh additional component tags command")

          for configTag in forceRefreshConfigTags :
            configHandler.update_component_tag(command['role'], configTag, command['configurationTags'][configTag])

          roleResult['customCommand'] = self.CUSTOM_COMMAND_RESTART # force restart for component to evict stale_config on server side
          command['configurationTags'] = configHandler.read_actual_component(command['role'])

      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']
      component = {'serviceName':command['serviceName'],'componentName':command['role']}
      if 'roleCommand' in command and \
          (command['roleCommand'] == self.ROLE_COMMAND_START or
             (command['roleCommand'] == self.ROLE_COMMAND_INSTALL and component in LiveStatus.CLIENT_COMPONENTS) or
               (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and
                  'custom_command' in command['hostLevelParams'] and
                      command['hostLevelParams']['custom_command'] in (self.CUSTOM_COMMAND_RESTART,
                                                                       self.CUSTOM_COMMAND_START,
                                                                       self.CUSTOM_COMMAND_RECONFIGURE))):
        configHandler.write_actual_component(command['role'],
                                             command['configurationTags'])
        if 'clientsToUpdateConfigs' in command['hostLevelParams'] and command['hostLevelParams']['clientsToUpdateConfigs']:
          configHandler.write_client_components(command['serviceName'],
                                                command['configurationTags'],
                                                command['hostLevelParams']['clientsToUpdateConfigs'])
        roleResult['configurationTags'] = configHandler.read_actual_component(
            command['role'])
    """

        self.recovery_manager.process_execution_command_result(command, status)
        self.commandStatuses.put_command_status(command, roleResult)

        cluster_id = str(command['clusterId'])

        if cluster_id != '-1' and cluster_id != 'null':
            service_name = command['serviceName']
            if service_name != 'null':
                component_name = command['role']
                self.component_status_executor.check_component_status(
                    clusterId,
                    service_name,
                    component_name,
                    "STATUS",
                    report=True)
Beispiel #22
0
  def execute_command(self, command):
    '''
    Executes commands of type EXECUTION_COMMAND
    '''
    clusterName = command['clusterName']
    commandId = command['commandId']
    isCommandBackground = command['commandType'] == self.BACKGROUND_EXECUTION_COMMAND
    isAutoExecuteCommand = command['commandType'] == self.AUTO_EXECUTION_COMMAND
    message = "Executing command with id = {commandId}, taskId = {taskId} for role = {role} of " \
              "cluster {cluster}.".format(
              commandId = str(commandId), taskId = str(command['taskId']),
              role=command['role'], cluster=clusterName)
    logger.info(message)

    taskId = command['taskId']
    # Preparing 'IN_PROGRESS' report
    in_progress_status = self.commandStatuses.generate_report_template(command)
    # The path of the files that contain the output log and error log use a prefix that the agent advertises to the
    # server. The prefix is defined in agent-config.ini
    if not isAutoExecuteCommand:
      in_progress_status.update({
        'tmpout': self.tmpdir + os.sep + 'output-' + str(taskId) + '.txt',
        'tmperr': self.tmpdir + os.sep + 'errors-' + str(taskId) + '.txt',
        'structuredOut' : self.tmpdir + os.sep + 'structured-out-' + str(taskId) + '.json',
        'status': self.IN_PROGRESS_STATUS
      })
    else:
      in_progress_status.update({
        'tmpout': self.tmpdir + os.sep + 'auto_output-' + str(taskId) + '.txt',
        'tmperr': self.tmpdir + os.sep + 'auto_errors-' + str(taskId) + '.txt',
        'structuredOut' : self.tmpdir + os.sep + 'auto_structured-out-' + str(taskId) + '.json',
        'status': self.IN_PROGRESS_STATUS
      })

    self.commandStatuses.put_command_status(command, in_progress_status)

    numAttempts = 0
    retryDuration = 0  # even with 0 allow one attempt
    retryAble = False
    delay = 1
    log_command_output = True
    if 'commandParams' in command and 'log_output' in command['commandParams'] and "false" == command['commandParams']['log_output']:
      log_command_output = False

    if 'commandParams' in command:
      if 'max_duration_for_retries' in command['commandParams']:
        retryDuration = int(command['commandParams']['max_duration_for_retries'])
      if 'command_retry_enabled' in command['commandParams']:
        retryAble = command['commandParams']['command_retry_enabled'] == "true"
    if isAutoExecuteCommand:
      retryAble = False

    logger.info("Command execution metadata - taskId = {taskId}, retry enabled = {retryAble}, max retry duration (sec) = {retryDuration}, log_output = {log_command_output}".
                 format(taskId=taskId, retryAble=retryAble, retryDuration=retryDuration, log_command_output=log_command_output))
    while retryDuration >= 0:
      numAttempts += 1
      start = 0
      if retryAble:
        start = int(time.time())
      # running command
      commandresult = self.customServiceOrchestrator.runCommand(command,
                                                                in_progress_status['tmpout'],
                                                                in_progress_status['tmperr'],
                                                                override_output_files=numAttempts == 1,
                                                                retry=numAttempts > 1)
      end = 1
      if retryAble:
        end = int(time.time())
      retryDuration -= (end - start)

      # dumping results
      if isCommandBackground:
        logger.info("Command is background command, quit retrying. Exit code: {exitCode}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
                    .format(cid=taskId, exitCode=commandresult['exitcode'], retryAble=retryAble, retryDuration=retryDuration, delay=delay))
        return
      else:
        if commandresult['exitcode'] == 0:
          status = self.COMPLETED_STATUS
        else:
          status = self.FAILED_STATUS
          if (commandresult['exitcode'] == -signal.SIGTERM) or (commandresult['exitcode'] == -signal.SIGKILL):
            logger.info('Command {cid} was canceled!'.format(cid=taskId))
            break

      if status != self.COMPLETED_STATUS and retryAble and retryDuration > 0:
        delay = self.get_retry_delay(delay)
        if delay > retryDuration:
          delay = retryDuration
        retryDuration -= delay  # allow one last attempt
        commandresult['stderr'] += "\n\nCommand failed. Retrying command execution ...\n\n"
        logger.info("Retrying command id {cid} after a wait of {delay}".format(cid=taskId, delay=delay))
        time.sleep(delay)
        continue
      else:
        logger.info("Quit retrying for command id {cid}. Status: {status}, retryAble: {retryAble}, retryDuration (sec): {retryDuration}, last delay (sec): {delay}"
                    .format(cid=taskId, status=status, retryAble=retryAble, retryDuration=retryDuration, delay=delay))
        break

    # final result to stdout
    commandresult['stdout'] += '\n\nCommand completed successfully!\n' if status == self.COMPLETED_STATUS else '\n\nCommand failed after ' + str(numAttempts) + ' tries\n'
    logger.info('Command {cid} completed successfully!'.format(cid=taskId) if status == self.COMPLETED_STATUS else 'Command {cid} failed after {attempts} tries'.format(cid=taskId, attempts=numAttempts))

    roleResult = self.commandStatuses.generate_report_template(command)
    roleResult.update({
      'stdout': commandresult['stdout'],
      'stderr': commandresult['stderr'],
      'exitCode': commandresult['exitcode'],
      'status': status,
    })

    if self.config.has_option("logging","log_command_executes") \
        and int(self.config.get("logging", "log_command_executes")) == 1 \
        and log_command_output:

      if roleResult['stdout'] != '':
          logger.info("Begin command output log for command with id = " + str(command['taskId']) + ", role = "
                      + command['role'] + ", roleCommand = " + command['roleCommand'])
          self.log_command_output(roleResult['stdout'], str(command['taskId']))
          logger.info("End command output log for command with id = " + str(command['taskId']) + ", role = "
                      + command['role'] + ", roleCommand = " + command['roleCommand'])

      if roleResult['stderr'] != '':
          logger.info("Begin command stderr log for command with id = " + str(command['taskId']) + ", role = "
                      + command['role'] + ", roleCommand = " + command['roleCommand'])
          self.log_command_output(roleResult['stderr'], str(command['taskId']))
          logger.info("End command stderr log for command with id = " + str(command['taskId']) + ", role = "
                      + command['role'] + ", roleCommand = " + command['roleCommand'])

    if roleResult['stdout'] == '':
      roleResult['stdout'] = 'None'
    if roleResult['stderr'] == '':
      roleResult['stderr'] = 'None'

    # let ambari know name of custom command
    if command['hostLevelParams'].has_key('custom_command'):
      roleResult['customCommand'] = command['hostLevelParams']['custom_command']

    if 'structuredOut' in commandresult:
      roleResult['structuredOut'] = str(json.dumps(commandresult['structuredOut']))
    else:
      roleResult['structuredOut'] = ''

    # let recovery manager know the current state
    if status == self.COMPLETED_STATUS:
      if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
          and self.controller.recovery_manager.configured_for_recovery(command['role']):
        if command['roleCommand'] == self.ROLE_COMMAND_START:
          self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
          self.controller.recovery_manager.update_config_staleness(command['role'], False)
          logger.info("After EXECUTION_COMMAND (START), with taskId=" + str(command['taskId']) +
                      ", current state of " + command['role'] + " to " +
                       self.controller.recovery_manager.get_current_status(command['role']) )
        elif command['roleCommand'] == self.ROLE_COMMAND_STOP or command['roleCommand'] == self.ROLE_COMMAND_INSTALL:
          self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.DEAD_STATUS)
          logger.info("After EXECUTION_COMMAND (STOP/INSTALL), with taskId=" + str(command['taskId']) +
                      ", current state of " + command['role'] + " to " +
                       self.controller.recovery_manager.get_current_status(command['role']) )
        elif command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND:
          if command['hostLevelParams'].has_key('custom_command') and \
                  command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART:
            self.controller.recovery_manager.update_current_status(command['role'], LiveStatus.LIVE_STATUS)
            self.controller.recovery_manager.update_config_staleness(command['role'], False)
            logger.info("After EXECUTION_COMMAND (RESTART), current state of " + command['role'] + " to " +
                         self.controller.recovery_manager.get_current_status(command['role']) )
      pass

      # let ambari know that configuration tags were applied
      configHandler = ActualConfigHandler(self.config, self.configTags)

      if command.has_key('configurationTags'):
        configHandler.write_actual(command['configurationTags'])
        roleResult['configurationTags'] = command['configurationTags']
      component = {'serviceName':command['serviceName'],'componentName':command['role']}
      if 'roleCommand' in command and \
          (command['roleCommand'] == self.ROLE_COMMAND_START or
             (command['roleCommand'] == self.ROLE_COMMAND_INSTALL and component in LiveStatus.CLIENT_COMPONENTS) or
               (command['roleCommand'] == self.ROLE_COMMAND_CUSTOM_COMMAND and
                  'custom_command' in command['hostLevelParams'] and
                      command['hostLevelParams']['custom_command'] == self.CUSTOM_COMMAND_RESTART)):
        configHandler.write_actual_component(command['role'],
                                             command['configurationTags'])
        if 'clientsToUpdateConfigs' in command['hostLevelParams'] and command['hostLevelParams']['clientsToUpdateConfigs']:
          configHandler.write_client_components(command['serviceName'],
                                                command['configurationTags'],
                                                command['hostLevelParams']['clientsToUpdateConfigs'])
        roleResult['configurationTags'] = configHandler.read_actual_component(
            command['role'])
    elif status == self.FAILED_STATUS:
      if self.controller.recovery_manager.enabled() and command.has_key('roleCommand') \
              and self.controller.recovery_manager.configured_for_recovery(command['role']):
        if command['roleCommand'] == self.ROLE_COMMAND_INSTALL:
          self.controller.recovery_manager.update_current_status(command['role'], self.controller.recovery_manager.INSTALL_FAILED)
          logger.info("After EXECUTION_COMMAND (INSTALL), with taskId=" + str(command['taskId']) +
                      ", current state of " + command['role'] + " to " +
                      self.controller.recovery_manager.get_current_status(command['role']))

    self.commandStatuses.put_command_status(command, roleResult)