コード例 #1
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_is_action_info_stale(self, time_mock):

        rm = RecoveryManager(tempfile.mktemp(), True)
        rm.update_config(5, 60, 5, 16, True, False, "")

        time_mock.return_value = 0
        self.assertFalse(rm.is_action_info_stale("COMPONENT_NAME"))

        rm.actions["COMPONENT_NAME"] = {
            "lastAttempt": 0,
            "count": 0,
            "lastReset": 0,
            "lifetimeCount": 0,
            "warnedLastAttempt": False,
            "warnedLastReset": False,
            "warnedThresholdReached": False
        }
        time_mock.return_value = 3600
        self.assertFalse(rm.is_action_info_stale("COMPONENT_NAME"))

        rm.actions["COMPONENT_NAME"] = {
            "lastAttempt": 1,
            "count": 1,
            "lastReset": 0,
            "lifetimeCount": 1,
            "warnedLastAttempt": False,
            "warnedLastReset": False,
            "warnedThresholdReached": False
        }
        time_mock.return_value = 3601
        self.assertFalse(rm.is_action_info_stale("COMPONENT_NAME"))

        time_mock.return_value = 3602
        self.assertTrue(rm.is_action_info_stale("COMPONENT_NAME"))
コード例 #2
0
  def test_defaults(self):
    rm = RecoveryManager()
    self.assertFalse(rm.enabled())
    self.assertEqual(None, rm.get_install_command("NODEMANAGER"))
    self.assertEqual(None, rm.get_start_command("NODEMANAGER"))

    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))
コード例 #3
0
  def test_is_action_info_stale(self, time_mock):

    rm = RecoveryManager(tempfile.mktemp(), True)
    rm.update_config(5, 60, 5, 16, True, False, False, "", -1)

    time_mock.return_value = 0
    self.assertFalse(rm.is_action_info_stale("COMPONENT_NAME"))

    rm.actions["COMPONENT_NAME"] = {
      "lastAttempt": 0,
      "count": 0,
      "lastReset": 0,
      "lifetimeCount": 0,
      "warnedLastAttempt": False,
      "warnedLastReset": False,
      "warnedThresholdReached": False
    }
    time_mock.return_value = 3600
    self.assertFalse(rm.is_action_info_stale("COMPONENT_NAME"))

    rm.actions["COMPONENT_NAME"] = {
      "lastAttempt": 1,
      "count": 1,
      "lastReset": 0,
      "lifetimeCount": 1,
      "warnedLastAttempt": False,
      "warnedLastReset": False,
      "warnedThresholdReached": False
    }
    time_mock.return_value = 3601
    self.assertFalse(rm.is_action_info_stale("COMPONENT_NAME"))

    time_mock.return_value = 3602
    self.assertTrue(rm.is_action_info_stale("COMPONENT_NAME"))
コード例 #4
0
  def test_defaults(self):
    rm = RecoveryManager(tempfile.mktemp())
    self.assertFalse(rm.enabled())
    self.assertEqual(None, rm.get_install_command("NODEMANAGER"))
    self.assertEqual(None, rm.get_start_command("NODEMANAGER"))

    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))
    pass
コード例 #5
0
  def test_execute_status_command(self, CustomServiceOrchestrator_mock,
                                  build_mock, execute_command_mock, requestComponentSecurityState_mock,
                                  requestComponentStatus_mock,
                                  status_update_callback):
    CustomServiceOrchestrator_mock.return_value = None
    dummy_controller = MagicMock()
    actionQueue = ActionQueue(AmbariConfig(), dummy_controller)

    build_mock.return_value = {'dummy report': '' }

    dummy_controller.recovery_manager = RecoveryManager(tempfile.mktemp())

    requestComponentStatus_mock.reset_mock()
    requestComponentStatus_mock.return_value = {'exitcode': 0 }

    requestComponentSecurityState_mock.reset_mock()
    requestComponentSecurityState_mock.return_value = 'UNKNOWN'

    actionQueue.execute_status_command(self.status_command)
    report = actionQueue.result()
    expected = {'dummy report': '',
                'securityState' : 'UNKNOWN'}

    self.assertEqual(len(report['componentStatus']), 1)
    self.assertEqual(report['componentStatus'][0], expected)
    self.assertTrue(requestComponentStatus_mock.called)
コード例 #6
0
    def init(self):
        """
    Initialize properties
    """
        self.config = AmbariConfig.get_resolved_config()

        self.is_registered = False

        self.metadata_cache = ClusterMetadataCache(
            self.config.cluster_cache_dir)
        self.topology_cache = ClusterTopologyCache(
            self.config.cluster_cache_dir, self.config)
        self.host_level_params_cache = ClusterHostLevelParamsCache(
            self.config.cluster_cache_dir)
        self.configurations_cache = ClusterConfigurationCache(
            self.config.cluster_cache_dir)
        self.alert_definitions_cache = ClusterAlertDefinitionsCache(
            self.config.cluster_cache_dir)
        self.configuration_builder = ConfigurationBuilder(self)
        self.stale_alerts_monitor = StaleAlertsMonitor(self)

        self.file_cache = FileCache(self.config)

        self.customServiceOrchestrator = CustomServiceOrchestrator(self)

        self.recovery_manager = RecoveryManager(self.config.recovery_cache_dir)
        self.commandStatuses = CommandStatusDict(self)
        self.action_queue = ActionQueue(self)
        self.alert_scheduler_handler = AlertSchedulerHandler(self)
コード例 #7
0
ファイル: InitializerModule.py プロジェクト: u39kun/ambari
    def init(self):
        """
    Initialize properties
    """
        self.is_registered = False

        self.metadata_cache = ClusterMetadataCache(
            self.config.cluster_cache_dir)
        self.topology_cache = ClusterTopologyCache(
            self.config.cluster_cache_dir, self.config)
        self.host_level_params_cache = ClusterHostLevelParamsCache(
            self.config.cluster_cache_dir)
        self.configurations_cache = ClusterConfigurationCache(
            self.config.cluster_cache_dir)
        self.alert_definitions_cache = ClusterAlertDefinitionsCache(
            self.config.cluster_cache_dir)
        self.configuration_builder = ConfigurationBuilder(self)
        self.stale_alerts_monitor = StaleAlertsMonitor(self)
        self.server_responses_listener = ServerResponsesListener()
        self.file_cache = FileCache(self.config)
        self.customServiceOrchestrator = CustomServiceOrchestrator(self)
        self.recovery_manager = RecoveryManager()
        self.commandStatuses = CommandStatusDict(self)

        self.init_threads()
コード例 #8
0
 def test_reset_queue(self, CustomServiceOrchestrator_mock,
                               get_mock, process_command_mock, gpeo_mock):
   CustomServiceOrchestrator_mock.return_value = None
   dummy_controller = MagicMock()
   dummy_controller.recovery_manager = RecoveryManager(tempfile.mktemp())
   config = MagicMock()
   gpeo_mock.return_value = 0
   config.get_parallel_exec_option = gpeo_mock
   actionQueue = ActionQueue(config, dummy_controller)
   actionQueue.start()
   actionQueue.put([self.datanode_install_command, self.hbase_install_command])
   self.assertEqual(2, actionQueue.commandQueue.qsize())
   self.assertTrue(actionQueue.tasks_in_progress_or_pending())
   actionQueue.reset()
   self.assertTrue(actionQueue.commandQueue.empty())
   self.assertFalse(actionQueue.tasks_in_progress_or_pending())
   time.sleep(0.1)
   actionQueue.stop()
   actionQueue.join()
   self.assertEqual(actionQueue.stopped(), True, 'Action queue is not stopped.')
コード例 #9
0
  def test_process_commands(self, mock_uds):
    rm = RecoveryManager(tempfile.mktemp(), True)
    rm.process_status_commands(None)
    self.assertFalse(mock_uds.called)

    rm.process_status_commands([])
    self.assertFalse(mock_uds.called)

    rm.process_status_commands([self.command])
    mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")])

    mock_uds.reset_mock()

    rm.process_status_commands([self.command, self.exec_command1, self.command])
    mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")], [call("NODEMANAGER", "STARTED")])

    mock_uds.reset_mock()

    rm.process_execution_commands([self.exec_command1, self.exec_command2, self.exec_command3])
    mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")], [call("NODEMANAGER", "STARTED")])

    mock_uds.reset_mock()

    rm.process_execution_commands([self.exec_command1, self.command])
    mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")])

    rm.process_execution_commands([self.exec_command4])
    mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")])
    pass
コード例 #10
0
  def test_recovery_required2(self):

    rm = RecoveryManager(tempfile.mktemp(), True, True)
    rm.update_config(15, 5, 1, 16, True, False, "", "")
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm = RecoveryManager(tempfile.mktemp(), True, True)
    rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("DATANODE", "INSTALLED")
    rm.update_desired_status("DATANODE", "STARTED")
    self.assertFalse(rm.requires_recovery("DATANODE"))

    rm = RecoveryManager(tempfile.mktemp(), True, True)
    rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("DATANODE", "INSTALLED")
    rm.update_desired_status("DATANODE", "STARTED")
    self.assertTrue(rm.requires_recovery("DATANODE"))

    rm.update_config(15, 5, 1, 16, True, False, "", "NODEMANAGER")
    rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "")
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("DATANODE", "INSTALLED")
    rm.update_desired_status("DATANODE", "STARTED")
    self.assertFalse(rm.requires_recovery("DATANODE"))

    rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER", "NODEMANAGER")
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("DATANODE", "INSTALLED")
    rm.update_desired_status("DATANODE", "STARTED")
    self.assertFalse(rm.requires_recovery("DATANODE"))
    pass
コード例 #11
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_configured_for_recovery(self):
        rm = RecoveryManager(tempfile.mktemp(), True)
        rm.update_config(12, 5, 1, 15, True, False, "A,B")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertTrue(rm.configured_for_recovery("B"))

        rm.update_config(5, 5, 1, 11, True, False, "")
        self.assertFalse(rm.configured_for_recovery("A"))
        self.assertFalse(rm.configured_for_recovery("B"))

        rm.update_config(5, 5, 1, 11, True, False, "A")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertFalse(rm.configured_for_recovery("B"))

        rm.update_config(5, 5, 1, 11, True, False, "A")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertFalse(rm.configured_for_recovery("B"))
        self.assertFalse(rm.configured_for_recovery("C"))

        rm.update_config(5, 5, 1, 11, True, False, "A, D, F ")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertFalse(rm.configured_for_recovery("B"))
        self.assertFalse(rm.configured_for_recovery("C"))
        self.assertTrue(rm.configured_for_recovery("D"))
        self.assertFalse(rm.configured_for_recovery("E"))
        self.assertTrue(rm.configured_for_recovery("F"))
コード例 #12
0
  def test_recovery_required(self):
    rm = RecoveryManager(tempfile.mktemp(), True, False)

    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("NODEMANAGER", "STARTED")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "XYS")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_desired_status("NODEMANAGER", "")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("NODEMANAGER", "INIT")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm = RecoveryManager(tempfile.mktemp(), True, True)

    rm.update_current_status("NODEMANAGER", "INIT")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("NODEMANAGER", "INIT")
    rm.update_desired_status("NODEMANAGER", "START")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "START")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    pass
コード例 #13
0
  def test_get_recovery_commands(self, time_mock):
    time_mock.side_effect = \
      [1000, 1001, 1002, 1003,
       1100, 1101, 1102,
       1200, 1201, 1203,
       4000, 4001, 4002, 4003,
       4100, 4101, 4102, 4103,
       4200, 4201, 4202,
       4300, 4301, 4302]
    rm = RecoveryManager(tempfile.mktemp(), True)
    rm.update_config(15, 5, 1, 16, True, False, "", "")

    command1 = copy.deepcopy(self.command)

    rm.store_or_update_command(command1)

    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertEqual("INSTALLED", rm.get_current_status("NODEMANAGER"))
    self.assertEqual("STARTED", rm.get_desired_status("NODEMANAGER"))

    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("START", commands[0]["roleCommand"])

    rm.update_current_status("NODEMANAGER", "INIT")
    rm.update_desired_status("NODEMANAGER", "STARTED")

    # Starts at 1100
    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("INSTALL", commands[0]["roleCommand"])

    rm.update_current_status("NODEMANAGER", "INIT")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")

    # Starts at 1200
    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("INSTALL", commands[0]["roleCommand"])

    rm.update_config(2, 5, 1, 5, True, True, "", "")
    rm.update_current_status("NODEMANAGER", "INIT")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")

    commands = rm.get_recovery_commands()
    self.assertEqual(0, len(commands))

    rm.update_config(12, 5, 1, 15, True, False, "", "")
    rm.update_current_status("NODEMANAGER", "INIT")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")

    rm.store_or_update_command(command1)
    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("INSTALL", commands[0]["roleCommand"])

    rm.update_config_staleness("NODEMANAGER", False)
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")
    commands = rm.get_recovery_commands()
    self.assertEqual(0, len(commands))

    command_install = copy.deepcopy(self.command)
    command_install["desiredState"] = "INSTALLED"
    rm.store_or_update_command(command_install)
    rm.update_config_staleness("NODEMANAGER", True)
    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("INSTALL", commands[0]["roleCommand"])

    rm.update_current_status("NODEMANAGER", "STARTED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("CUSTOM_COMMAND", commands[0]["roleCommand"])
    self.assertEqual("RESTART", commands[0]["hostLevelParams"]["custom_command"])

    rm.update_current_status("NODEMANAGER", "STARTED")
    rm.update_desired_status("NODEMANAGER", "INSTALLED")
    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("STOP", commands[0]["roleCommand"])
    pass
コード例 #14
0
  def test_auto_execute_command(self, status_update_callback_mock, open_mock):
    # Make file read calls visible
    def open_side_effect(file, mode):
      if mode == 'r':
        file_mock = MagicMock()
        file_mock.read.return_value = "Read from " + str(file)
        return file_mock
      else:
        return self.original_open(file, mode)
    open_mock.side_effect = open_side_effect

    config = AmbariConfig()
    tempdir = tempfile.gettempdir()
    config.set('agent', 'prefix', tempdir)
    config.set('agent', 'cache_dir', "/var/lib/ambari-agent/cache")
    config.set('agent', 'tolerate_download_failures', "true")
    dummy_controller = MagicMock()
    dummy_controller.recovery_manager = RecoveryManager(tempfile.mktemp())
    dummy_controller.recovery_manager.update_config(5, 5, 1, 11, True, False, "", -1)

    actionQueue = ActionQueue(config, dummy_controller)
    unfreeze_flag = threading.Event()
    python_execution_result_dict = {
      'stdout': 'out',
      'stderr': 'stderr',
      'structuredOut' : ''
    }

    def side_effect(command, tmpoutfile, tmperrfile, override_output_files=True, retry=False):
      unfreeze_flag.wait()
      return python_execution_result_dict
    def patched_aq_execute_command(command):
      # We have to perform patching for separate thread in the same thread
      with patch.object(CustomServiceOrchestrator, "runCommand") as runCommand_mock:
        runCommand_mock.side_effect = side_effect
        actionQueue.process_command(command)

    python_execution_result_dict['status'] = 'COMPLETE'
    python_execution_result_dict['exitcode'] = 0
    self.assertFalse(actionQueue.tasks_in_progress_or_pending())
    # We call method in a separate thread
    execution_thread = Thread(target = patched_aq_execute_command ,
                              args = (self.datanode_auto_start_command, ))
    execution_thread.start()
    #  check in progress report
    # wait until ready
    while True:
      time.sleep(0.1)
      if actionQueue.tasks_in_progress_or_pending():
        break
    # Continue command execution
    unfreeze_flag.set()
    # wait until ready
    check_queue = True
    while check_queue:
      report = actionQueue.result()
      if not actionQueue.tasks_in_progress_or_pending():
        break
      time.sleep(0.1)

    self.assertEqual(len(report['reports']), 0)

    ## Test failed execution
    python_execution_result_dict['status'] = 'FAILED'
    python_execution_result_dict['exitcode'] = 13
    # We call method in a separate thread
    execution_thread = Thread(target = patched_aq_execute_command ,
                              args = (self.datanode_auto_start_command, ))
    execution_thread.start()
    unfreeze_flag.set()
    #  check in progress report
    # wait until ready
    while check_queue:
      report = actionQueue.result()
      if not actionQueue.tasks_in_progress_or_pending():
        break
      time.sleep(0.1)

    self.assertEqual(len(report['reports']), 0)
コード例 #15
0
    def test_process_commands(self, mock_uds):
        rm = RecoveryManager(True)
        rm.process_status_commands(None)
        self.assertFalse(mock_uds.called)

        rm.process_status_commands([])
        self.assertFalse(mock_uds.called)

        rm.process_status_commands([self.command])
        mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")])

        mock_uds.reset_mock()

        rm.process_status_commands(
            [self.command, self.exec_command1, self.command])
        mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")],
                                  [call("NODEMANAGER", "STARTED")])

        mock_uds.reset_mock()

        rm.process_execution_commands(
            [self.exec_command1, self.exec_command2, self.exec_command3])
        mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")],
                                  [call("NODEMANAGER", "STARTED")])

        mock_uds.reset_mock()

        rm.process_execution_commands([self.exec_command1, self.command])
        mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")])
        pass
コード例 #16
0
    def test_configured_for_recovery(self):
        rm = RecoveryManager(True)
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertTrue(rm.configured_for_recovery("B"))

        rm.update_config(5, 5, 1, 11, True, False, "", "")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertTrue(rm.configured_for_recovery("B"))

        rm.update_config(5, 5, 1, 11, True, False, "A", "")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertFalse(rm.configured_for_recovery("B"))

        rm.update_config(5, 5, 1, 11, True, False, "", "B,C")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertFalse(rm.configured_for_recovery("B"))
        self.assertFalse(rm.configured_for_recovery("C"))

        rm.update_config(5, 5, 1, 11, True, False, "A, D, F ", "B,C")
        self.assertTrue(rm.configured_for_recovery("A"))
        self.assertFalse(rm.configured_for_recovery("B"))
        self.assertFalse(rm.configured_for_recovery("C"))
        self.assertTrue(rm.configured_for_recovery("D"))
        self.assertFalse(rm.configured_for_recovery("E"))
        self.assertTrue(rm.configured_for_recovery("F"))
コード例 #17
0
  def test_reset_if_window_passed_since_last_attempt(self, time_mock):
    time_mock.side_effect = \
      [1000, 1071, 1372]
    rm = RecoveryManager(tempfile.mktemp(), True)

    rm.update_config(2, 5, 1, 4, True, True, "", "")

    rm.execute("COMPONENT")
    actions = rm.get_actions_copy()["COMPONENT"]
    self.assertEquals(actions['lastReset'], 1000)
    rm.execute("COMPONENT")
    actions = rm.get_actions_copy()["COMPONENT"]
    self.assertEquals(actions['lastReset'], 1000)
    #reset if window_in_sec seconds passed since last attempt
    rm.execute("COMPONENT")
    actions = rm.get_actions_copy()["COMPONENT"]
    self.assertEquals(actions['lastReset'], 1372)
コード例 #18
0
  def test_configured_for_recovery(self):
    rm = RecoveryManager(tempfile.mktemp(), True)
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertTrue(rm.configured_for_recovery("B"))

    rm.update_config(5, 5, 1, 11, True, False, "", "")
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertTrue(rm.configured_for_recovery("B"))

    rm.update_config(5, 5, 1, 11, True, False, "A", "")
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertFalse(rm.configured_for_recovery("B"))

    rm.update_config(5, 5, 1, 11, True, False, "", "B,C")
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertFalse(rm.configured_for_recovery("B"))
    self.assertFalse(rm.configured_for_recovery("C"))

    rm.update_config(5, 5, 1, 11, True, False, "A, D, F ", "B,C")
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertFalse(rm.configured_for_recovery("B"))
    self.assertFalse(rm.configured_for_recovery("C"))
    self.assertTrue(rm.configured_for_recovery("D"))
    self.assertFalse(rm.configured_for_recovery("E"))
    self.assertTrue(rm.configured_for_recovery("F"))
コード例 #19
0
 def test_command_count(self):
   rm = RecoveryManager(tempfile.mktemp(), True)
   self.assertFalse(rm.has_active_command())
   rm.start_execution_command()
   self.assertTrue(rm.has_active_command())
   rm.start_execution_command()
   self.assertTrue(rm.has_active_command())
   rm.stop_execution_command()
   self.assertTrue(rm.has_active_command())
   rm.stop_execution_command()
   self.assertFalse(rm.has_active_command())
コード例 #20
0
  def test_command_expiry(self, time_mock):
    time_mock.side_effect = \
      [1000, 1001, 1002, 1003, 1104, 1105, 1106, 1807, 1808, 1809, 1810, 1811, 1812]

    rm = RecoveryManager(tempfile.mktemp(), True)
    rm.update_config(5, 5, 1, 11, True, False, "", "")

    command1 = copy.deepcopy(self.command)

    rm.store_or_update_command(command1)

    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")

    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("START", commands[0]["roleCommand"])

    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("START", commands[0]["roleCommand"])

    #1807 command is stale
    commands = rm.get_recovery_commands()
    self.assertEqual(0, len(commands))

    rm.store_or_update_command(command1)
    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("START", commands[0]["roleCommand"])
    pass
コード例 #21
0
  def test_recovery_report(self, time_mock):
    time_mock.side_effect = \
      [1000, 1071, 1072, 1470, 1471, 1472, 1543, 1644, 1815]

    rm = RecoveryManager(tempfile.mktemp())
    rec_st = rm.get_recovery_status()
    self.assertEquals(rec_st, {"summary": "DISABLED"})

    rm.update_config(2, 5, 1, 4, True, True, "", "")
    rec_st = rm.get_recovery_status()
    self.assertEquals(rec_st, {"summary": "RECOVERABLE", "componentReports": []})

    rm.execute("PUMA")
    rec_st = rm.get_recovery_status()
    self.assertEquals(rec_st, {"summary": "RECOVERABLE",
                               "componentReports": [{"name": "PUMA", "numAttempts": 1, "limitReached": False}]})
    rm.execute("PUMA")
    rm.execute("LION")

    rec_st = rm.get_recovery_status()
    self.assertEquals(rec_st, {"summary": "RECOVERABLE",
                               "componentReports": [
                                 {"name": "LION", "numAttempts": 1, "limitReached": False},
                                 {"name": "PUMA", "numAttempts": 2, "limitReached": False}
                               ]})
    rm.execute("PUMA")
    rm.execute("LION")
    rm.execute("PUMA")
    rm.execute("PUMA")
    rm.execute("LION")
    rec_st = rm.get_recovery_status()
    self.assertEquals(rec_st, {"summary": "PARTIALLY_RECOVERABLE",
                               "componentReports": [
                                 {"name": "LION", "numAttempts": 3, "limitReached": False},
                                 {"name": "PUMA", "numAttempts": 4, "limitReached": True}
                               ]})

    rm.execute("LION")
    rec_st = rm.get_recovery_status()
    self.assertEquals(rec_st, {"summary": "UNRECOVERABLE",
                               "componentReports": [
                                 {"name": "LION", "numAttempts": 4, "limitReached": True},
                                 {"name": "PUMA", "numAttempts": 4, "limitReached": True}
                               ]})
    pass
コード例 #22
0
  def test_update_rm_config(self, mock_uc):
    rm = RecoveryManager(tempfile.mktemp())
    rm.update_configuration_from_registration(None)
    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])

    mock_uc.reset_mock()
    rm.update_configuration_from_registration({})
    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])

    mock_uc.reset_mock()
    rm.update_configuration_from_registration(
      {"recoveryConfig": {
      "type" : "DEFAULT"}}
    )
    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "", "")])

    mock_uc.reset_mock()
    rm.update_configuration_from_registration(
      {"recoveryConfig": {
        "type" : "FULL"}}
    )
    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, "", "")])

    mock_uc.reset_mock()
    rm.update_configuration_from_registration(
      {"recoveryConfig": {
        "type" : "AUTO_START",
        "max_count" : "med"}}
    )
    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, "", "")])

    mock_uc.reset_mock()
    rm.update_configuration_from_registration(
      {"recoveryConfig": {
        "type" : "AUTO_START",
        "maxCount" : "5",
        "windowInMinutes" : 20,
        "retryGap" : 2,
        "maxLifetimeCount" : 5,
        "enabledComponents" : " A,B",
        "disabledComponents" : "C"}}
    )
    mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, " A,B", "C")])
コード例 #23
0
    def test_get_recovery_commands(self, time_mock):
        time_mock.side_effect = \
          [1000, 2000, 3000, 4000, 5000, 6000]
        rm = RecoveryManager(True)
        rm.update_config(10, 5, 1, 11, True, False)

        command1 = copy.deepcopy(self.command)

        rm.store_or_update_command(command1)

        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "STARTED")

        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("START", commands[0]["roleCommand"])

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "STARTED")

        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("INSTALL", commands[0]["roleCommand"])

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")

        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("INSTALL", commands[0]["roleCommand"])

        rm.update_config(2, 5, 1, 5, True, True)
        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")

        commands = rm.get_recovery_commands()
        self.assertEqual(0, len(commands))
        pass
コード例 #24
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_sliding_window(self, time_mock):
        time_mock.side_effect = \
          [1000, 1001, 1002, 1003, 1004, 1071, 1150, 1151, 1152, 1153, 1400, 1401,
           1500, 1571, 1572, 1653, 1900, 1971, 2300, 2301]

        rm = RecoveryManager(tempfile.mktemp(), True, False)
        self.assertTrue(rm.enabled())

        config = rm.update_config(0, 60, 5, 12, True, False, "")
        self.assertFalse(rm.enabled())

        rm.update_config(6, 60, 5, 12, True, False, "")
        self.assertTrue(rm.enabled())

        rm.update_config(6, 0, 5, 12, True, False, "")
        self.assertFalse(rm.enabled())

        rm.update_config(6, 60, 0, 12, True, False, "")
        self.assertFalse(rm.enabled())

        rm.update_config(6, 60, 1, 12, True, False, None)
        self.assertTrue(rm.enabled())

        rm.update_config(6, 60, 61, 12, True, False, None)
        self.assertFalse(rm.enabled())

        rm.update_config(6, 60, 5, 4, True, False, "")
        self.assertFalse(rm.enabled())

        # maximum 2 in 2 minutes and at least 1 minute wait
        rm.update_config(2, 5, 1, 4, True, False, "")
        self.assertTrue(rm.enabled())

        # T = 1000-2
        self.assertTrue(rm.may_execute("NODEMANAGER"))
        self.assertTrue(rm.may_execute("NODEMANAGER"))
        self.assertTrue(rm.may_execute("NODEMANAGER"))

        # T = 1003-4
        self.assertTrue(rm.execute("NODEMANAGER"))
        self.assertFalse(rm.execute("NODEMANAGER"))  # too soon

        # T = 1071
        self.assertTrue(rm.execute("NODEMANAGER"))  # 60+ seconds passed

        # T = 1150-3
        self.assertFalse(rm.execute("NODEMANAGER"))  # limit 2 exceeded
        self.assertFalse(rm.may_execute("NODEMANAGER"))
        self.assertTrue(rm.execute("DATANODE"))
        self.assertTrue(rm.may_execute("NAMENODE"))

        # T = 1400-1
        self.assertTrue(rm.execute("NODEMANAGER"))  # windows reset
        self.assertFalse(rm.may_execute("NODEMANAGER"))  # too soon

        # maximum 2 in 2 minutes and no min wait
        rm.update_config(2, 5, 1, 5, True, True, "")

        # T = 1500-3
        self.assertTrue(rm.execute("NODEMANAGER2"))
        self.assertTrue(rm.may_execute("NODEMANAGER2"))
        self.assertTrue(rm.execute("NODEMANAGER2"))
        self.assertFalse(rm.execute("NODEMANAGER2"))  # max limit

        # T = 1900-2
        self.assertTrue(rm.execute("NODEMANAGER2"))
        self.assertTrue(rm.execute("NODEMANAGER2"))

        # T = 2300-2
        # lifetime max reached
        self.assertTrue(rm.execute("NODEMANAGER2"))
        self.assertFalse(rm.execute("NODEMANAGER2"))
        pass
コード例 #25
0
    def test_recovery_required(self):
        rm = RecoveryManager(True, False)

        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "STARTED")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "XYS")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm = RecoveryManager(True, True)

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "START")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "START")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))
        pass
コード例 #26
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_store_from_status_and_use(self):
        rm = RecoveryManager(tempfile.mktemp(), True)

        command1 = copy.deepcopy(self.command)

        rm.store_or_update_command(command1)
        self.assertTrue(rm.command_exists("NODEMANAGER", "EXECUTION_COMMAND"))

        install_command = rm.get_install_command("NODEMANAGER")
        start_command = rm.get_start_command("NODEMANAGER")

        self.assertEqual("INSTALL", install_command["roleCommand"])
        self.assertEqual("START", start_command["roleCommand"])
        self.assertEqual("AUTO_EXECUTION_COMMAND",
                         install_command["commandType"])
        self.assertEqual("AUTO_EXECUTION_COMMAND",
                         start_command["commandType"])
        self.assertEqual("NODEMANAGER", install_command["role"])
        self.assertEqual("NODEMANAGER", start_command["role"])
        self.assertEquals(install_command["configurations"],
                          start_command["configurations"])

        self.assertEqual(2, install_command["taskId"])
        self.assertEqual(3, start_command["taskId"])

        self.assertEqual(None, rm.get_install_command("component2"))
        self.assertEqual(None, rm.get_start_command("component2"))

        self.assertTrue(rm.remove_command("NODEMANAGER"))
        self.assertFalse(rm.remove_command("NODEMANAGER"))

        self.assertEqual(None, rm.get_install_command("NODEMANAGER"))
        self.assertEqual(None, rm.get_start_command("NODEMANAGER"))

        self.assertEqual(None, rm.get_install_command("component2"))
        self.assertEqual(None, rm.get_start_command("component2"))

        rm.store_or_update_command(command1)
        self.assertTrue(rm.command_exists("NODEMANAGER", "EXECUTION_COMMAND"))
        rm.set_paused(True)

        self.assertEqual(None, rm.get_install_command("NODEMANAGER"))
        self.assertEqual(None, rm.get_start_command("NODEMANAGER"))

        pass
コード例 #27
0
    def test_recovery_required(self):
        rm = RecoveryManager(MagicMock(), False)
        rm.update_config(
            12,
            5,
            1,
            15,
            True,
            False,
            False,
        )
        rm.update_recovery_config({
            'recoveryConfig': {
                'components': [{
                    'component_name': 'NODEMANAGER',
                    'service_name': 'YARN',
                    'desired_state': 'INSTALLED'
                }]
            }
        })

        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "STARTED")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "XYS")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm = RecoveryManager(MagicMock(), True)

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "START")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "START")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))
コード例 #28
0
  def test_store_from_status_and_use(self):
    rm = RecoveryManager(tempfile.mktemp(), True)

    command1 = copy.deepcopy(self.command)

    rm.store_or_update_command(command1)
    self.assertTrue(rm.command_exists("NODEMANAGER", "EXECUTION_COMMAND"))

    install_command = rm.get_install_command("NODEMANAGER")
    start_command = rm.get_start_command("NODEMANAGER")

    self.assertEqual("INSTALL", install_command["roleCommand"])
    self.assertEqual("START", start_command["roleCommand"])
    self.assertEqual("AUTO_EXECUTION_COMMAND", install_command["commandType"])
    self.assertEqual("AUTO_EXECUTION_COMMAND", start_command["commandType"])
    self.assertEqual("NODEMANAGER", install_command["role"])
    self.assertEqual("NODEMANAGER", start_command["role"])
    self.assertEquals(install_command["configurations"], start_command["configurations"])

    self.assertEqual(2, install_command["taskId"])
    self.assertEqual(3, start_command["taskId"])

    self.assertEqual(None, rm.get_install_command("component2"))
    self.assertEqual(None, rm.get_start_command("component2"))

    self.assertTrue(rm.remove_command("NODEMANAGER"))
    self.assertFalse(rm.remove_command("NODEMANAGER"))

    self.assertEqual(None, rm.get_install_command("NODEMANAGER"))
    self.assertEqual(None, rm.get_start_command("NODEMANAGER"))

    self.assertEqual(None, rm.get_install_command("component2"))
    self.assertEqual(None, rm.get_start_command("component2"))

    rm.store_or_update_command(command1)
    self.assertTrue(rm.command_exists("NODEMANAGER", "EXECUTION_COMMAND"))
    rm.set_paused(True)

    self.assertEqual(None, rm.get_install_command("NODEMANAGER"))
    self.assertEqual(None, rm.get_start_command("NODEMANAGER"))

    pass
コード例 #29
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_process_commands(self, mock_uds):
        rm = RecoveryManager(tempfile.mktemp(), True)
        rm.process_status_commands(None)
        self.assertFalse(mock_uds.called)

        rm.process_status_commands([])
        self.assertFalse(mock_uds.called)

        rm.process_status_commands([self.command])
        mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")])

        mock_uds.reset_mock()

        rm.process_status_commands(
            [self.command, self.exec_command1, self.command])
        mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")],
                                  [call("NODEMANAGER", "STARTED")])

        mock_uds.reset_mock()

        rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
        rm.process_execution_commands(
            [self.exec_command1, self.exec_command2, self.exec_command3])
        mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")],
                                  [call("NODEMANAGER", "STARTED")])

        mock_uds.reset_mock()

        rm.process_execution_commands([self.exec_command1, self.command])
        mock_uds.assert_has_calls([call("NODEMANAGER", "INSTALLED")])

        rm.process_execution_commands([self.exec_command4])
        mock_uds.assert_has_calls([call("NODEMANAGER", "STARTED")])
        pass
コード例 #30
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_update_rm_config(self, mock_uc):
        rm = RecoveryManager(tempfile.mktemp())
        rm.update_configuration_from_registration(None)
        mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "")])

        mock_uc.reset_mock()
        rm.update_configuration_from_registration({})
        mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "")])

        mock_uc.reset_mock()
        rm.update_configuration_from_registration(
            {"recoveryConfig": {
                "type": "DEFAULT"
            }})
        mock_uc.assert_has_calls([call(6, 60, 5, 12, False, True, "")])

        mock_uc.reset_mock()
        rm.update_configuration_from_registration(
            {"recoveryConfig": {
                "type": "FULL"
            }})
        mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, "")])

        mock_uc.reset_mock()
        rm.update_configuration_from_registration(
            {"recoveryConfig": {
                "type": "AUTO_START",
                "max_count": "med"
            }})
        mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, "")])

        mock_uc.reset_mock()
        rm.update_configuration_from_registration({
            "recoveryConfig": {
                "type": "AUTO_START",
                "maxCount": "5",
                "windowInMinutes": 20,
                "retryGap": 2,
                "maxLifetimeCount": 5,
                "components": " A,B"
            }
        })
        mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, " A,B")])
コード例 #31
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_recovery_required2(self):

        rm = RecoveryManager(tempfile.mktemp(), True, True)
        rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER")
        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm = RecoveryManager(tempfile.mktemp(), True, True)
        rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER")
        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("DATANODE", "INSTALLED")
        rm.update_desired_status("DATANODE", "STARTED")
        self.assertFalse(rm.requires_recovery("DATANODE"))

        rm = RecoveryManager(tempfile.mktemp(), True, True)
        rm.update_config(15, 5, 1, 16, True, False, "")
        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertFalse(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("DATANODE", "INSTALLED")
        rm.update_desired_status("DATANODE", "STARTED")
        self.assertFalse(rm.requires_recovery("DATANODE"))

        rm.update_config(15, 5, 1, 16, True, False, "NODEMANAGER")
        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertTrue(rm.requires_recovery("NODEMANAGER"))

        rm.update_current_status("DATANODE", "INSTALLED")
        rm.update_desired_status("DATANODE", "STARTED")
        self.assertFalse(rm.requires_recovery("DATANODE"))
        pass
コード例 #32
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_command_expiry(self, time_mock):
        time_mock.side_effect = \
          [1000, 1001, 1002, 1003, 1104, 1105, 1106, 1807, 1808, 1809, 1810, 1811, 1812]

        rm = RecoveryManager(tempfile.mktemp(), True)
        rm.update_config(5, 5, 1, 11, True, False, "")

        command1 = copy.deepcopy(self.command)

        rm.store_or_update_command(command1)
        rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "STARTED")

        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("START", commands[0]["roleCommand"])

        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("START", commands[0]["roleCommand"])

        #1807 command is stale
        commands = rm.get_recovery_commands()
        self.assertEqual(0, len(commands))

        rm.store_or_update_command(command1)
        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("START", commands[0]["roleCommand"])
        pass
コード例 #33
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_get_recovery_commands(self, time_mock):
        time_mock.side_effect = \
          [1000, 1001, 1002, 1003,
           1100, 1101, 1102,
           1200, 1201, 1203,
           4000, 4001, 4002, 4003,
           4100, 4101, 4102, 4103,
           4200, 4201, 4202,
           4300, 4301, 4302]
        rm = RecoveryManager(tempfile.mktemp(), True)
        rm.update_config(15, 5, 1, 16, True, False, "")

        command1 = copy.deepcopy(self.command)

        rm.store_or_update_command(command1)
        rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "STARTED")
        self.assertEqual("INSTALLED", rm.get_current_status("NODEMANAGER"))
        self.assertEqual("STARTED", rm.get_desired_status("NODEMANAGER"))

        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("START", commands[0]["roleCommand"])

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "STARTED")

        # Starts at 1100
        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("INSTALL", commands[0]["roleCommand"])

        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")

        # Starts at 1200
        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("INSTALL", commands[0]["roleCommand"])

        rm.update_config(2, 5, 1, 5, True, True, "")
        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")

        commands = rm.get_recovery_commands()
        self.assertEqual(0, len(commands))

        rm.update_config(12, 5, 1, 15, True, False, "NODEMANAGER")
        rm.update_current_status("NODEMANAGER", "INIT")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")

        rm.store_or_update_command(command1)
        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("INSTALL", commands[0]["roleCommand"])

        rm.update_config_staleness("NODEMANAGER", False)
        rm.update_current_status("NODEMANAGER", "INSTALLED")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        commands = rm.get_recovery_commands()
        self.assertEqual(0, len(commands))

        command_install = copy.deepcopy(self.command)
        command_install["desiredState"] = "INSTALLED"
        rm.store_or_update_command(command_install)
        rm.update_config_staleness("NODEMANAGER", True)
        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("INSTALL", commands[0]["roleCommand"])

        rm.update_current_status("NODEMANAGER", "STARTED")
        rm.update_desired_status("NODEMANAGER", "STARTED")
        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("CUSTOM_COMMAND", commands[0]["roleCommand"])
        self.assertEqual("RESTART",
                         commands[0]["hostLevelParams"]["custom_command"])

        rm.update_current_status("NODEMANAGER", "STARTED")
        rm.update_desired_status("NODEMANAGER", "INSTALLED")
        commands = rm.get_recovery_commands()
        self.assertEqual(1, len(commands))
        self.assertEqual("STOP", commands[0]["roleCommand"])
        pass
コード例 #34
0
  def test_sliding_window(self, time_mock):
    time_mock.side_effect = \
      [1000, 1001, 1002, 1003, 1004, 1071, 1150, 1151, 1152, 1153, 1400, 1401,
       1500, 1571, 1572, 1653, 1900, 1971, 2300, 2301]

    rm = RecoveryManager(tempfile.mktemp(), True, False)
    self.assertTrue(rm.enabled())

    rm.update_config(0, 60, 5, 12, True, False, "", "")
    self.assertFalse(rm.enabled())

    rm.update_config(6, 60, 5, 12, True, False, "", "")
    self.assertTrue(rm.enabled())

    rm.update_config(6, 0, 5, 12, True, False, "", "")
    self.assertFalse(rm.enabled())

    rm.update_config(6, 60, 0, 12, True, False, "", "")
    self.assertFalse(rm.enabled())

    rm.update_config(6, 60, 1, 12, True, False, None, None)
    self.assertTrue(rm.enabled())

    rm.update_config(6, 60, 61, 12, True, False, "", None)
    self.assertFalse(rm.enabled())

    rm.update_config(6, 60, 5, 0, True, False, None, "")
    self.assertFalse(rm.enabled())

    rm.update_config(6, 60, 5, 4, True, False, "", "")
    self.assertFalse(rm.enabled())

    # maximum 2 in 2 minutes and at least 1 minute wait
    rm.update_config(2, 5, 1, 4, True, False, "", "")
    self.assertTrue(rm.enabled())

    # T = 1000-2
    self.assertTrue(rm.may_execute("NODEMANAGER"))
    self.assertTrue(rm.may_execute("NODEMANAGER"))
    self.assertTrue(rm.may_execute("NODEMANAGER"))

    # T = 1003-4
    self.assertTrue(rm.execute("NODEMANAGER"))
    self.assertFalse(rm.execute("NODEMANAGER"))  # too soon

    # T = 1071
    self.assertTrue(rm.execute("NODEMANAGER"))  # 60+ seconds passed

    # T = 1150-3
    self.assertFalse(rm.execute("NODEMANAGER"))  # limit 2 exceeded
    self.assertFalse(rm.may_execute("NODEMANAGER"))
    self.assertTrue(rm.execute("DATANODE"))
    self.assertTrue(rm.may_execute("NAMENODE"))

    # T = 1400-1
    self.assertTrue(rm.execute("NODEMANAGER"))  # windows reset
    self.assertFalse(rm.may_execute("NODEMANAGER"))  # too soon

    # maximum 2 in 2 minutes and no min wait
    rm.update_config(2, 5, 1, 5, True, True, "", "")

    # T = 1500-3
    self.assertTrue(rm.execute("NODEMANAGER2"))
    self.assertTrue(rm.may_execute("NODEMANAGER2"))
    self.assertTrue(rm.execute("NODEMANAGER2"))
    self.assertFalse(rm.execute("NODEMANAGER2"))  # max limit

    # T = 1900-2
    self.assertTrue(rm.execute("NODEMANAGER2"))
    self.assertTrue(rm.execute("NODEMANAGER2"))

    # T = 2300-2
    # lifetime max reached
    self.assertTrue(rm.execute("NODEMANAGER2"))
    self.assertFalse(rm.execute("NODEMANAGER2"))
    pass
コード例 #35
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_recovery_report(self, time_mock):
        time_mock.side_effect = \
          [1000, 1071, 1072, 1470, 1471, 1472, 1543, 1644, 1815]

        rm = RecoveryManager(tempfile.mktemp())
        rec_st = rm.get_recovery_status()
        self.assertEquals(rec_st, {"summary": "DISABLED"})

        rm.update_config(2, 5, 1, 4, True, True, "")
        rec_st = rm.get_recovery_status()
        self.assertEquals(rec_st, {
            "summary": "RECOVERABLE",
            "componentReports": []
        })

        rm.execute("PUMA")
        rec_st = rm.get_recovery_status()
        self.assertEquals(
            rec_st, {
                "summary":
                "RECOVERABLE",
                "componentReports": [{
                    "name": "PUMA",
                    "numAttempts": 1,
                    "limitReached": False
                }]
            })
        rm.execute("PUMA")
        rm.execute("LION")

        rec_st = rm.get_recovery_status()
        self.assertEquals(
            rec_st, {
                "summary":
                "RECOVERABLE",
                "componentReports": [{
                    "name": "LION",
                    "numAttempts": 1,
                    "limitReached": False
                }, {
                    "name": "PUMA",
                    "numAttempts": 2,
                    "limitReached": False
                }]
            })
        rm.execute("PUMA")
        rm.execute("LION")
        rm.execute("PUMA")
        rm.execute("PUMA")
        rm.execute("LION")
        rec_st = rm.get_recovery_status()
        self.assertEquals(
            rec_st, {
                "summary":
                "PARTIALLY_RECOVERABLE",
                "componentReports": [{
                    "name": "LION",
                    "numAttempts": 3,
                    "limitReached": False
                }, {
                    "name": "PUMA",
                    "numAttempts": 4,
                    "limitReached": True
                }]
            })

        rm.execute("LION")
        rec_st = rm.get_recovery_status()
        self.assertEquals(
            rec_st, {
                "summary":
                "UNRECOVERABLE",
                "componentReports": [{
                    "name": "LION",
                    "numAttempts": 4,
                    "limitReached": True
                }, {
                    "name": "PUMA",
                    "numAttempts": 4,
                    "limitReached": True
                }]
            })
        pass
コード例 #36
0
  def test_recovery_required2(self):

    rm = RecoveryManager(True, True)
    rm.update_config(15, 5, 1, 16, True, False, False, [
      {'component_name': 'NODEMANAGER', 'service_name': 'YARN', 'desired_state': 'INSTALLED'}
    ])
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm = RecoveryManager( True, True)
    rm.update_config(15, 5, 1, 16, True, False, False, [
      {'component_name': 'NODEMANAGER', 'service_name': 'YARN', 'desired_state': 'INSTALLED'}
    ])
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("DATANODE", "INSTALLED")
    rm.update_desired_status("DATANODE", "STARTED")
    self.assertFalse(rm.requires_recovery("DATANODE"))

    rm = RecoveryManager(True, True)
    rm.update_config(15, 5, 1, 16, True, False, False, "")
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertFalse(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("DATANODE", "INSTALLED")
    rm.update_desired_status("DATANODE", "STARTED")
    self.assertFalse(rm.requires_recovery("DATANODE"))

    rm.update_config(15, 5, 1, 16, True, False, False, [
      {'component_name': 'NODEMANAGER', 'service_name': 'YARN', 'desired_state': 'INSTALLED'}
    ])
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")
    self.assertTrue(rm.requires_recovery("NODEMANAGER"))

    rm.update_current_status("DATANODE", "INSTALLED")
    rm.update_desired_status("DATANODE", "STARTED")
    self.assertFalse(rm.requires_recovery("DATANODE"))
コード例 #37
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
 def test_command_count(self):
     rm = RecoveryManager(tempfile.mktemp(), True)
     self.assertFalse(rm.has_active_command())
     rm.start_execution_command()
     self.assertTrue(rm.has_active_command())
     rm.start_execution_command()
     self.assertTrue(rm.has_active_command())
     rm.stop_execution_command()
     self.assertTrue(rm.has_active_command())
     rm.stop_execution_command()
     self.assertFalse(rm.has_active_command())
コード例 #38
0
  def test_update_rm_config(self, mock_uc):
    rm = RecoveryManager()
    rm.update_recovery_config(None)
    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, False, False, [])])

    mock_uc.reset_mock()
    rm.update_recovery_config({})
    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, False, False, [])])

    mock_uc.reset_mock()
    rm.update_recovery_config(
      {"recoveryConfig": {
      "type" : "DEFAULT"}}
    )
    mock_uc.assert_has_calls([call(6, 60, 5, 12, False, False, False, [])])

    mock_uc.reset_mock()
    rm.update_recovery_config(
      {"recoveryConfig": {
        "type" : "FULL"}}
    )
    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, False, [])])

    mock_uc.reset_mock()
    rm.update_recovery_config(
      {"recoveryConfig": {
        "type" : "AUTO_START",
        "max_count" : "med"}}
    )
    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, True, False, [])])

    mock_uc.reset_mock()
    rm.update_recovery_config(
      {"recoveryConfig": {
        "type" : "AUTO_INSTALL_START",
        "max_count" : "med"}}
    )
    mock_uc.assert_has_calls([call(6, 60, 5, 12, True, False, True, [])])

    mock_uc.reset_mock()
    rm.update_recovery_config(
      {"recoveryConfig": {
        "type": "AUTO_START",
        "maxCount": "5",
        "windowInMinutes" : 20,
        "retryGap": 2,
        "maxLifetimeCount" : 5,
        "components": [
          {
            "service_name": "A",
            "component_name": "A",
            "desired_state": "INSTALLED"
          },
          {
            "service_name": "B",
            "component_name": "B",
            "desired_state": "INSTALLED"
          }
        ],
        "recoveryTimestamp": 1}}
    )
    mock_uc.assert_has_calls([call(5, 20, 2, 5, True, True, False, [
      {'component_name': 'A', 'service_name': 'A', 'desired_state': 'INSTALLED'},
      {'component_name': 'B', 'service_name': 'B', 'desired_state': 'INSTALLED'}
    ])])
コード例 #39
0
ファイル: TestRecoveryManager.py プロジェクト: shwhite/ambari
    def test_reset_if_window_passed_since_last_attempt(self, time_mock):
        time_mock.side_effect = \
          [1000, 1071, 1372]
        rm = RecoveryManager(tempfile.mktemp(), True)

        rm.update_config(2, 5, 1, 4, True, True, "")

        rm.execute("COMPONENT")
        actions = rm.get_actions_copy()["COMPONENT"]
        self.assertEquals(actions['lastReset'], 1000)
        rm.execute("COMPONENT")
        actions = rm.get_actions_copy()["COMPONENT"]
        self.assertEquals(actions['lastReset'], 1000)
        #reset if window_in_sec seconds passed since last attempt
        rm.execute("COMPONENT")
        actions = rm.get_actions_copy()["COMPONENT"]
        self.assertEquals(actions['lastReset'], 1372)
コード例 #40
0
  def test_command_expiry(self, time_mock):
    time_mock.side_effect = \
      [1000, 1001, 1104, 1105, 1106, 1807, 1808, 1809, 1810, 1811, 1812]

    rm = RecoveryManager(True)
    rm.update_config(5, 5, 0, 11, True, False, False, "")

    command1 = copy.deepcopy(self.command)

    #rm.store_or_update_command(command1)
    rm.update_config(12, 5, 1, 15, True, False, False, [
      {'component_name': 'NODEMANAGER', 'service_name': 'YARN', 'desired_state': 'INSTALLED'}
    ])
    rm.update_current_status("NODEMANAGER", "INSTALLED")
    rm.update_desired_status("NODEMANAGER", "STARTED")

    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("START", commands[0]["roleCommand"])

    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("START", commands[0]["roleCommand"])
    
    rm.retry_gap_in_sec = 60

    #1807 command is stale
    commands = rm.get_recovery_commands()
    self.assertEqual(0, len(commands))

    commands = rm.get_recovery_commands()
    self.assertEqual(1, len(commands))
    self.assertEqual("START", commands[0]["roleCommand"])
コード例 #41
0
ファイル: TestHeartbeat.py プロジェクト: zengzhaozheng/ambari
  def test_build_long_result(self, result_mock):
    config = AmbariConfig.AmbariConfig()
    config.set('agent', 'prefix', 'tmp')
    config.set('agent', 'cache_dir', "/var/lib/ambari-agent/cache")
    config.set('agent', 'tolerate_download_failures', "true")
    dummy_controller = MagicMock()
    dummy_controller.recovery_manager = RecoveryManager(tempfile.mktemp())
    actionQueue = ActionQueue(config, dummy_controller)
    result_mock.return_value = {
      'reports': [{'status': 'IN_PROGRESS',
            'stderr': 'Read from /tmp/errors-3.txt',
            'stdout': 'Read from /tmp/output-3.txt',
            'clusterName': u'cc',
            'roleCommand': u'INSTALL',
            'serviceName': u'HDFS',
            'role': u'DATANODE',
            'actionId': '1-1',
            'taskId': 3,
            'exitCode': 777},

            {'status': 'COMPLETED',
             'stderr': 'stderr',
             'stdout': 'out',
             'clusterName': 'clusterName',
             'roleCommand': 'UPGRADE',
             'serviceName': 'serviceName',
             'role': 'role',
             'actionId': 17,
             'taskId': 'taskId',
             'exitCode': 0},

            {'status': 'FAILED',
             'stderr': 'stderr',
             'stdout': 'out',
             'clusterName': u'cc',
             'roleCommand': u'INSTALL',
             'serviceName': u'HDFS',
             'role': u'DATANODE',
             'actionId': '1-1',
             'taskId': 3,
             'exitCode': 13},

            {'status': 'COMPLETED',
             'stderr': 'stderr',
             'stdout': 'out',
             'clusterName': u'cc',
             'configurationTags': {'global': {'tag': 'v1'}},
             'roleCommand': u'INSTALL',
             'serviceName': u'HDFS',
             'role': u'DATANODE',
             'actionId': '1-1',
             'taskId': 3,
             'exitCode': 0}

            ],
      'componentStatus': [
        {'status': 'HEALTHY', 'componentName': 'DATANODE'},
        {'status': 'UNHEALTHY', 'componentName': 'NAMENODE'},
      ],
    }
    heartbeat = Heartbeat(actionQueue)
    hb = heartbeat.build(10)
    hb['hostname'] = 'hostname'
    hb['timestamp'] = 'timestamp'
    expected = {'nodeStatus':
                  {'status': 'HEALTHY',
                   'cause': 'NONE'},
                'recoveryReport': {'summary': 'DISABLED'},
                'recoveryTimestamp': -1,
                'timestamp': 'timestamp', 'hostname': 'hostname',
                'responseId': 10, 'reports': [
      {'status': 'IN_PROGRESS', 'roleCommand': u'INSTALL',
       'serviceName': u'HDFS', 'role': u'DATANODE', 'actionId': '1-1',
       'stderr': 'Read from /tmp/errors-3.txt',
       'stdout': 'Read from /tmp/output-3.txt', 'clusterName': u'cc',
       'taskId': 3, 'exitCode': 777},
      {'status': 'COMPLETED', 'roleCommand': 'UPGRADE',
       'serviceName': 'serviceName', 'role': 'role', 'actionId': 17,
       'stderr': 'stderr', 'stdout': 'out', 'clusterName': 'clusterName',
       'taskId': 'taskId', 'exitCode': 0},
      {'status': 'FAILED', 'roleCommand': u'INSTALL', 'serviceName': u'HDFS',
       'role': u'DATANODE', 'actionId': '1-1', 'stderr': 'stderr',
       'stdout': 'out', 'clusterName': u'cc', 'taskId': 3, 'exitCode': 13},
      {'status': 'COMPLETED', 'stdout': 'out',
       'configurationTags': {'global': {'tag': 'v1'}}, 'taskId': 3,
       'exitCode': 0, 'roleCommand': u'INSTALL', 'clusterName': u'cc',
       'serviceName': u'HDFS', 'role': u'DATANODE', 'actionId': '1-1',
       'stderr': 'stderr'}], 'componentStatus': [
      {'status': 'HEALTHY', 'componentName': 'DATANODE'},
      {'status': 'UNHEALTHY', 'componentName': 'NAMENODE'}]}
    self.assertEqual.__self__.maxDiff = None
    self.assertEquals(hb, expected)
コード例 #42
0
  def test_configured_for_recovery(self):
    rm = RecoveryManager(True)
    rm.update_config(12, 5, 1, 15, True, False, False, [
      {'component_name': 'A', 'service_name': 'A', 'desired_state': 'INSTALLED'},
      {'component_name': 'B', 'service_name': 'B', 'desired_state': 'INSTALLED'},
    ])
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertTrue(rm.configured_for_recovery("B"))

    rm.update_config(5, 5, 1, 11, True, False, False, [])
    self.assertFalse(rm.configured_for_recovery("A"))
    self.assertFalse(rm.configured_for_recovery("B"))

    rm.update_config(5, 5, 1, 11, True, False, False, [
      {'component_name': 'A', 'service_name': 'A', 'desired_state': 'INSTALLED'}
    ])
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertFalse(rm.configured_for_recovery("B"))

    rm.update_config(5, 5, 1, 11, True, False, False, [
      {'component_name': 'A', 'service_name': 'A', 'desired_state': 'INSTALLED'}
    ])
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertFalse(rm.configured_for_recovery("B"))
    self.assertFalse(rm.configured_for_recovery("C"))

    rm.update_config(5, 5, 1, 11, True, False, False, [
      {'component_name': 'A', 'service_name': 'A', 'desired_state': 'INSTALLED'},
      {'component_name': 'D', 'service_name': 'D', 'desired_state': 'INSTALLED'},
      {'component_name': 'F', 'service_name': 'F', 'desired_state': 'INSTALLED'}
    ])
    self.assertTrue(rm.configured_for_recovery("A"))
    self.assertFalse(rm.configured_for_recovery("B"))
    self.assertFalse(rm.configured_for_recovery("C"))
    self.assertTrue(rm.configured_for_recovery("D"))
    self.assertFalse(rm.configured_for_recovery("E"))
    self.assertTrue(rm.configured_for_recovery("F"))