def testProcessCommandEvent_multiDevice( self, mock_notify, mock_command_event_type_count): """Should populate multiple devices.""" _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, "0", common.CommandState.UNKNOWN, task=tasks[0]) queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.RUNNING, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "0", "InvocationStarted", task=tasks[0], device_serials=["d1", "d2"]) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.RUNNING, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].attempt_index, 0) self.assertEqual(tasks[0].leasable, False) attempts = command_manager.GetCommandAttempts(request_id, command_id) self.assertEqual(1, len(attempts)) self.assertEqual(["d1", "d2"], attempts[0].device_serials) mock_notify.assert_called_with(request_id) expected_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "InvocationStarted" } mock_command_event_type_count.Increment.assert_called_once_with( expected_metric_fields)
def testProcessCommandEvent_ignoreOutOfDateEvent( self, mock_notify, mock_command_event_type_count): """Should complete command state for InvocationCompleted events.""" _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) tasks = command_manager.GetActiveTasks(self.command) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, "0", common.CommandState.UNKNOWN, task=tasks[0]) invocation_completed_event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "0", "InvocationCompleted", task=tasks[0], time=TIMESTAMP_INT + 60) invocation_started_event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "0", "InvocationStarted", task=tasks[0], time=TIMESTAMP_INT + 30) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.QUEUED, queried_command.state) command_event_handler.ProcessCommandEvent(invocation_completed_event) queried_command = command_manager.GetCommand(request_id, command_id) command_attempt = command_manager.GetCommandAttempts( request_id, command_id)[0] self.assertEqual( TIMESTAMP + datetime.timedelta(seconds=60), command_attempt.last_event_time) self.assertEqual(common.CommandState.COMPLETED, queried_command.state) # The second event is ignored. command_event_handler.ProcessCommandEvent(invocation_started_event) queried_command = command_manager.GetCommand(request_id, command_id) command_attempt = command_manager.GetCommandAttempts( request_id, command_id)[0] self.assertEqual( TIMESTAMP + datetime.timedelta(seconds=60), command_attempt.last_event_time) self.assertEqual(common.CommandState.COMPLETED, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 0) attempts = command_manager.GetCommandAttempts( request_id, command_id) self.assertEqual(1, len(attempts)) self.assertEqual("summary", attempts[0].summary) self.assertEqual(1000, attempts[0].total_test_count) self.assertEqual(100, attempts[0].failed_test_count) mock_notify.assert_called_with(request_id) # Metrics should still be logged for out of order events started_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "InvocationStarted" } completed_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "InvocationCompleted" } mock_command_event_type_count.Increment.assert_has_calls( [mock.call(completed_metric_fields), mock.call(started_metric_fields)])
def ProcessCommandEvent(event): """Updates state of a command and coordinate command tasks. Args: event: a CommandEvent """ command = command_manager.GetCommand(event.request_id, event.command_id) if not command: logging.warning("unknown command %s %s; ignored", event.request_id, event.command_id) return is_updated = command_manager.UpdateCommandAttempt(event) # No need to coordinate if the event is old but continue if it is final. # We continue if it is final as datastore update to the command and the # attempt aren't done in the same transaction, so a failed command update # should be retried in that event. if not is_updated and not common.IsFinalCommandState(event.attempt_state): logging.debug("Command attempt is not updated.") return if common.IsFinalCommandState(event.attempt_state): metric.RecordCommandAttemptMetric(cluster_id=command.cluster, run_target=command.run_target, hostname=event.hostname, state=event.attempt_state.name) # Update command. command = command_manager.UpdateState(event.request_id, event.command_id, attempt_state=event.attempt_state, task_id=event.task_id) if common.IsFinalCommandState(command.state): # Deschedule command since the state indicates that it is not supposed # to run anymore. logging.debug("Command %r is finalized, delete all its tasks.", command.key) command_manager.DeleteTasks(command) # Update AnTS. env_config.CONFIG.plugin.OnProcessCommandEvent( command_manager.GetCommand(event.request_id, event.command_id), command_manager.GetCommandAttempt(event.request_id, event.command_id, event.attempt_id), event_data=event.data) # Update request. request, request_summary = request_manager.EvaluateState(event.request_id) _CheckPendingCommands(request, request_summary)
def testProcessCommandEvent_invocationCompleted_multipleRunsWithErrors( self, mock_notify): """Should complete command state for InvocationCompleted events. This tests a case when a run count is greater than command_manager.MAX_TASK_COUNT. Args: mock_notify: mock function to notify request state changes. """ run_count = 100 _, request_id, _, command_id = self.command.key.flat() self.command.run_count = run_count self.command.put() command_manager.ScheduleTasks([self.command]) max_error_count = ( command_manager.MAX_ERROR_COUNT_BASE + int(run_count * command_manager.MAX_ERROR_COUNT_RATIO)) for i in range(max_error_count): tasks = command_manager.GetActiveTasks(self.command) self.assertEqual( len(tasks), min(run_count - i, command_manager.MAX_TASK_COUNT)) next_leasable_task = next((t for t in tasks if t.leasable), None) command_task_store.LeaseTask(next_leasable_task.task_id) command_event_test_util.CreateCommandAttempt( self.command, str(i), common.CommandState.UNKNOWN, task=next_leasable_task) # It should be marked as error at the max error count attempts. queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.ERROR, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, str(i), "InvocationCompleted", task=next_leasable_task, error="error") command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.ERROR, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 0) command_attempts = command_manager.GetCommandAttempts( request_id, command_id) self.assertEqual(len(command_attempts), max_error_count) run_attempt_pairs = [ (attempt.run_index, attempt.attempt_index) for attempt in command_attempts] self.assertEqual(len(set(run_attempt_pairs)), len(command_attempts)) mock_notify.assert_called_with(request_id)
def testProcessCommandEvent_invocationCompleted_multipleRuns_20( self, mock_notify): """Should complete command state for InvocationCompleted events. This tests a case when a run count is greater than command_manager.MAX_TASK_COUNT. Args: mock_notify: mock function to notify request state changes. """ run_count = 100 _, request_id, _, command_id = self.command.key.flat() self.command.run_count = run_count self.command.put() command_manager.ScheduleTasks([self.command]) for i in range(run_count): tasks = command_manager.GetActiveTasks(self.command) self.assertEqual( len(tasks), min(run_count - i, command_manager.MAX_TASK_COUNT)) next_leasable_task = next((t for t in tasks if t.leasable), None) command_task_store.LeaseTask(next_leasable_task.task_id) queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.COMPLETED, queried_command.state) command_event_test_util.CreateCommandAttempt( self.command, str(i), common.CommandState.UNKNOWN, task=next_leasable_task) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, str(i), "InvocationCompleted", task=next_leasable_task) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.COMPLETED, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 0) command_attempts = command_manager.GetCommandAttempts( request_id, command_id) self.assertEqual(len(command_attempts), run_count) self.assertSetEqual( set(attempt.run_index for attempt in command_attempts), set(range(run_count))) mock_notify.assert_called_with(request_id)
def testProcessCommandEvent_allocationFailed(self, mock_command_event_type_count, mock_event_legacy_count): """Should reschedule tasks that send AllocationFailed events.""" _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) # Command should stay queued even after MAX_CANCELED_COUNT_BASE different # attempts result in an AllocationFailed error. num_attempts = command_manager.MAX_CANCELED_COUNT_BASE for i in range(num_attempts): tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, str(i), common.CommandState.UNKNOWN, task=tasks[0]) queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.ERROR, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, str(i), "AllocationFailed", task=tasks[0]) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.CANCELED, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 0) command_attempts = command_manager.GetCommandAttempts( request_id, command_id) self.assertEqual( len(command_attempts), command_manager.MAX_CANCELED_COUNT_BASE) self.assertSetEqual( set(attempt.attempt_index for attempt in command_attempts), set(range(command_manager.MAX_CANCELED_COUNT_BASE))) expected_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "AllocationFailed" } mock_command_event_type_count.Increment.assert_has_calls( [mock.call(expected_metric_fields)] * num_attempts) mock_event_legacy_count.Increment.assert_has_calls([mock.call({})] * num_attempts)
def testProcessCommandEvent_unknownEvent(self, mock_notify): """Should update command state for unknown events.""" _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, "0", common.CommandState.UNKNOWN, task=tasks[0]) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.QUEUED, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "0", "SomeRandomInexistentType", task=tasks[0]) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.QUEUED, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) mock_notify.not_called()
def testProcessCommandEvent_InvocationStarted( self, mock_notify, mock_command_event_type_count): """Should update command state for InvocationStarted events. State should become RUNNING if it isn't already. Args: mock_notify: mock function to notify request state changes. mock_command_event_type_count: mock command event type count metric """ _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, "0", common.CommandState.UNKNOWN, task=tasks[0]) queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.RUNNING, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "0", "InvocationStarted", task=tasks[0]) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.RUNNING, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].attempt_index, 0) self.assertEqual(tasks[0].leasable, False) attempts = command_manager.GetCommandAttempts(request_id, command_id) self.assertEqual(1, len(attempts)) self.assertEqual(["0123456789ABCDEF"], attempts[0].device_serials) mock_notify.assert_called_with(request_id) expected_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "InvocationStarted" } mock_command_event_type_count.Increment.assert_called_once_with( expected_metric_fields)
def testProcessCommandEvent_invocationCompleted( self, mock_notify, mock_command_event_type_count): """Should complete command state for InvocationCompleted events. Args: mock_notify: mock function to notify request state changes. mock_command_event_type_count: mock command event type count metric """ _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, "0", common.CommandState.UNKNOWN, task=tasks[0]) queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.COMPLETED, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "0", "InvocationCompleted", task=tasks[0]) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.COMPLETED, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 0) attempts = command_manager.GetCommandAttempts(request_id, command_id) self.assertEqual(1, len(attempts)) self.assertEqual("summary", attempts[0].summary) self.assertEqual(1000, attempts[0].total_test_count) self.assertEqual(100, attempts[0].failed_test_count) self.assertEqual(10, attempts[0].failed_test_run_count) mock_notify.assert_called_with(request_id) expected_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "InvocationCompleted" } mock_command_event_type_count.Increment.assert_called_once_with( expected_metric_fields)
def testProcessCommandEvent_fetchFailed( self, mock_notify, mock_command_event_type_count): """Should error commands from FetchFailed events.""" _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) # It should be marked as error at the MAX_ERROR_COUNT_BASE attempt. for i in range(command_manager.MAX_ERROR_COUNT_BASE): tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) self.assertEqual(tasks[0].attempt_index, i) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, str(i), common.CommandState.UNKNOWN, task=tasks[0]) queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.ERROR, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, str(i), "FetchFailed", task=tasks[0]) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.ERROR, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 0) command_attempts = command_manager.GetCommandAttempts( request_id, command_id) self.assertEqual( len(command_attempts), command_manager.MAX_ERROR_COUNT_BASE) self.assertSetEqual( set(attempt.attempt_index for attempt in command_attempts), set(range(command_manager.MAX_ERROR_COUNT_BASE))) mock_notify.assert_called_with(request_id) expected_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "FetchFailed" } mock_command_event_type_count.Increment.assert_has_calls( [mock.call(expected_metric_fields) ] * command_manager.MAX_ERROR_COUNT_BASE)
def testProcessCommandEvent_fatalEvent( self, mock_notify, mock_command_event_type_count): """Should not reschedule a configuration error, request should error out. Args: mock_notify: mock function to notify request state changes. mock_command_event_type_count: mock command event type count metric """ _, request_id, _, command_id = self.command.key.flat() command_manager.ScheduleTasks([self.command]) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 1) command_task_store.LeaseTask(tasks[0].task_id) command_event_test_util.CreateCommandAttempt( self.command, "0", common.CommandState.UNKNOWN, task=tasks[0]) queried_command = command_manager.GetCommand(request_id, command_id) self.assertNotEqual(common.CommandState.FATAL, queried_command.state) event = command_event_test_util.CreateTestCommandEvent( request_id, command_id, "0", "ConfigurationError", data={"error_status": "CUSTOMER_ISSUE"}, task=tasks[0]) command_event_handler.ProcessCommandEvent(event) queried_command = command_manager.GetCommand(request_id, command_id) self.assertEqual(common.CommandState.FATAL, queried_command.state) tasks = command_manager.GetActiveTasks(self.command) self.assertEqual(len(tasks), 0) attempts = command_manager.GetCommandAttempts(request_id, command_id) self.assertEqual(1, len(attempts)) mock_notify.assert_called_with(request_id) expected_metric_fields = { metric.METRIC_FIELD_HOSTNAME: "hostname", metric.METRIC_FIELD_TYPE: "ConfigurationError" } mock_command_event_type_count.Increment.assert_called_once_with( expected_metric_fields)
def _EnsureCommandConsistency(self, request_id, command_id, task_id): """Ensures consistency between the command in DB and the task store. Args: request_id: request id, str command_id: command id, str task_id: leased task's id for this command. Returns: True if data is consistent and it should proceed. False otherwise. """ command = command_manager.GetCommand(request_id, command_id) if command: return (self._EnsureRequestConsistency(command) and self._EnsureCommandBeingActive(command)) else: # Command has been deleted. logging.warning("Command with id [%s %s] does not exist. Deleting leased " "task [%s].", request_id, command_id, task_id) command_manager.DeleteTask(task_id) return False
def ProcessCommandEvent(event): """Processes a command event. Args: event: a CommandEvent object. """ logging.debug("Processing command event: %s", str(event)) command = command_manager.GetCommand(event.request_id, event.command_id) LogCommandEventMetrics(command=command, event=event) request_sync_key = request_sync_monitor.GetRequestSyncStatusKey( event.request_id) if request_sync_key.get(): request_sync_monitor.StoreCommandEvent(event) else: logging.debug("No request sync found for %s. Using legacy processing.", event.request_id) # This can also happen if TFC received late events on final requests. metric.command_event_legacy_processing_count.Increment({}) # We keep the legacy processing as some use cases depend on having command # attempt data updated even after the request is final: # Eg: measuring fuzzing time commander.ProcessCommandEvent(event)
def SyncCommand(request_id, command_id, add_to_sync_queue=True): """Sync the command. Cancel the command if inactive, otherwise re-add to the sync queue to check back again later. Args: request_id: Request ID for the command to sync command_id: Command ID for the command to sync add_to_sync_queue: Flag to determine if this command should be added to the sync queue """ command = command_manager.GetCommand(request_id, command_id) if not command: logging.warning('No command found to sync. Request %s Command %s', request_id, command_id) return if command.state in common.FINAL_COMMAND_STATES: # No need to sync commands in final states logging.info('Command reached final state %s. Request %s Command %s', command.state, request_id, command_id) return if command.state != common.CommandState.QUEUED: # Only consider queued commands for inactivity checks. if add_to_sync_queue: AddToSyncQueue(command) return last_active_time = command_manager.GetLastCommandActiveTime(command) inactive_time = common.Now() - last_active_time if datetime.timedelta(minutes=MAX_COMMAND_EVENT_DELAY_MIN) < inactive_time: # Ensure command is leasable. If a worker leases a command task but does # not report back within MAX_COMMAND_EVENT_DELAY, we need to make it # leasable again. logging.debug( '%r has no events for %r minutes (> %r); ensuring leasable.', command.key, inactive_time, MAX_COMMAND_EVENT_DELAY_MIN) try: command_manager.EnsureLeasable(command) except command_manager.CommandTaskNotFoundError: logging.exception('command %s %s is not leasable; cancelling...', request_id, command_id) _HandleCommandNotExecutable(request_id, command_id) return timeout_seconds = GetCommandQueueTimeoutSeconds(command) if inactive_time > datetime.timedelta(seconds=timeout_seconds): # Command timed out. logging.info(( 'Canceling command %s: last active %s, inactive time %s is greater ' 'than %d seconds'), command.key, last_active_time, inactive_time, timeout_seconds) _HandleInactiveCommand(command, request_id, command_id) return # Add to the sync queue to check again later. if add_to_sync_queue: AddToSyncQueue(command)