def testProcessCommandEvent_multiDevice(
      self, mock_notify, mock_command_event_type_count):
    """Should populate multiple devices."""
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    command_task_store.LeaseTask(tasks[0].task_id)
    command_event_test_util.CreateCommandAttempt(
        self.command, "0", common.CommandState.UNKNOWN, task=tasks[0])
    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertNotEqual(common.CommandState.RUNNING, queried_command.state)
    event = command_event_test_util.CreateTestCommandEvent(
        request_id, command_id, "0", "InvocationStarted",
        task=tasks[0], device_serials=["d1", "d2"])
    command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.RUNNING, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    self.assertEqual(tasks[0].attempt_index, 0)
    self.assertEqual(tasks[0].leasable, False)
    attempts = command_manager.GetCommandAttempts(request_id, command_id)
    self.assertEqual(1, len(attempts))
    self.assertEqual(["d1", "d2"], attempts[0].device_serials)
    mock_notify.assert_called_with(request_id)
    expected_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "InvocationStarted"
    }
    mock_command_event_type_count.Increment.assert_called_once_with(
        expected_metric_fields)
  def testProcessCommandEvent_ignoreOutOfDateEvent(
      self, mock_notify, mock_command_event_type_count):
    """Should complete command state for InvocationCompleted events."""
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    tasks = command_manager.GetActiveTasks(self.command)
    command_task_store.LeaseTask(tasks[0].task_id)
    command_event_test_util.CreateCommandAttempt(
        self.command, "0", common.CommandState.UNKNOWN, task=tasks[0])
    invocation_completed_event = command_event_test_util.CreateTestCommandEvent(
        request_id, command_id, "0", "InvocationCompleted",
        task=tasks[0], time=TIMESTAMP_INT + 60)
    invocation_started_event = command_event_test_util.CreateTestCommandEvent(
        request_id, command_id, "0", "InvocationStarted",
        task=tasks[0], time=TIMESTAMP_INT + 30)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.QUEUED, queried_command.state)

    command_event_handler.ProcessCommandEvent(invocation_completed_event)
    queried_command = command_manager.GetCommand(request_id, command_id)
    command_attempt = command_manager.GetCommandAttempts(
        request_id, command_id)[0]
    self.assertEqual(
        TIMESTAMP + datetime.timedelta(seconds=60),
        command_attempt.last_event_time)
    self.assertEqual(common.CommandState.COMPLETED, queried_command.state)

    # The second event is ignored.
    command_event_handler.ProcessCommandEvent(invocation_started_event)
    queried_command = command_manager.GetCommand(request_id, command_id)
    command_attempt = command_manager.GetCommandAttempts(
        request_id, command_id)[0]
    self.assertEqual(
        TIMESTAMP + datetime.timedelta(seconds=60),
        command_attempt.last_event_time)
    self.assertEqual(common.CommandState.COMPLETED, queried_command.state)

    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 0)
    attempts = command_manager.GetCommandAttempts(
        request_id, command_id)
    self.assertEqual(1, len(attempts))
    self.assertEqual("summary", attempts[0].summary)
    self.assertEqual(1000, attempts[0].total_test_count)
    self.assertEqual(100, attempts[0].failed_test_count)
    mock_notify.assert_called_with(request_id)
    # Metrics should still be logged for out of order events
    started_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "InvocationStarted"
    }
    completed_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "InvocationCompleted"
    }
    mock_command_event_type_count.Increment.assert_has_calls(
        [mock.call(completed_metric_fields),
         mock.call(started_metric_fields)])
def ProcessCommandEvent(event):
    """Updates state of a command and coordinate command tasks.

  Args:
    event: a CommandEvent
  """
    command = command_manager.GetCommand(event.request_id, event.command_id)
    if not command:
        logging.warning("unknown command %s %s; ignored", event.request_id,
                        event.command_id)
        return

    is_updated = command_manager.UpdateCommandAttempt(event)

    # No need to coordinate if the event is old but continue if it is final.
    # We continue if it is final as datastore update to the command and the
    # attempt aren't done in the same transaction, so a failed command update
    # should be retried in that event.
    if not is_updated and not common.IsFinalCommandState(event.attempt_state):
        logging.debug("Command attempt is not updated.")
        return

    if common.IsFinalCommandState(event.attempt_state):
        metric.RecordCommandAttemptMetric(cluster_id=command.cluster,
                                          run_target=command.run_target,
                                          hostname=event.hostname,
                                          state=event.attempt_state.name)

    # Update command.
    command = command_manager.UpdateState(event.request_id,
                                          event.command_id,
                                          attempt_state=event.attempt_state,
                                          task_id=event.task_id)

    if common.IsFinalCommandState(command.state):
        # Deschedule command since the state indicates that it is not supposed
        # to run anymore.
        logging.debug("Command %r is finalized, delete all its tasks.",
                      command.key)
        command_manager.DeleteTasks(command)

    # Update AnTS.
    env_config.CONFIG.plugin.OnProcessCommandEvent(
        command_manager.GetCommand(event.request_id, event.command_id),
        command_manager.GetCommandAttempt(event.request_id, event.command_id,
                                          event.attempt_id),
        event_data=event.data)

    # Update request.
    request, request_summary = request_manager.EvaluateState(event.request_id)

    _CheckPendingCommands(request, request_summary)
  def testProcessCommandEvent_invocationCompleted_multipleRunsWithErrors(
      self, mock_notify):
    """Should complete command state for InvocationCompleted events.

    This tests a case when a run count is greater than
    command_manager.MAX_TASK_COUNT.

    Args:
      mock_notify: mock function to notify request state changes.
    """
    run_count = 100
    _, request_id, _, command_id = self.command.key.flat()
    self.command.run_count = run_count
    self.command.put()
    command_manager.ScheduleTasks([self.command])

    max_error_count = (
        command_manager.MAX_ERROR_COUNT_BASE +
        int(run_count * command_manager.MAX_ERROR_COUNT_RATIO))
    for i in range(max_error_count):
      tasks = command_manager.GetActiveTasks(self.command)
      self.assertEqual(
          len(tasks), min(run_count - i, command_manager.MAX_TASK_COUNT))
      next_leasable_task = next((t for t in tasks if t.leasable), None)
      command_task_store.LeaseTask(next_leasable_task.task_id)
      command_event_test_util.CreateCommandAttempt(
          self.command, str(i), common.CommandState.UNKNOWN,
          task=next_leasable_task)
      # It should be marked as error at the max error count attempts.
      queried_command = command_manager.GetCommand(request_id, command_id)
      self.assertNotEqual(common.CommandState.ERROR, queried_command.state)
      event = command_event_test_util.CreateTestCommandEvent(
          request_id, command_id, str(i), "InvocationCompleted",
          task=next_leasable_task, error="error")
      command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.ERROR, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 0)
    command_attempts = command_manager.GetCommandAttempts(
        request_id, command_id)
    self.assertEqual(len(command_attempts), max_error_count)
    run_attempt_pairs = [
        (attempt.run_index, attempt.attempt_index)
        for attempt in command_attempts]
    self.assertEqual(len(set(run_attempt_pairs)), len(command_attempts))
    mock_notify.assert_called_with(request_id)
  def testProcessCommandEvent_invocationCompleted_multipleRuns_20(
      self, mock_notify):
    """Should complete command state for InvocationCompleted events.

    This tests a case when a run count is greater than
    command_manager.MAX_TASK_COUNT.

    Args:
      mock_notify: mock function to notify request state changes.
    """
    run_count = 100
    _, request_id, _, command_id = self.command.key.flat()
    self.command.run_count = run_count
    self.command.put()
    command_manager.ScheduleTasks([self.command])

    for i in range(run_count):
      tasks = command_manager.GetActiveTasks(self.command)
      self.assertEqual(
          len(tasks), min(run_count - i, command_manager.MAX_TASK_COUNT))
      next_leasable_task = next((t for t in tasks if t.leasable), None)
      command_task_store.LeaseTask(next_leasable_task.task_id)
      queried_command = command_manager.GetCommand(request_id, command_id)
      self.assertNotEqual(common.CommandState.COMPLETED, queried_command.state)
      command_event_test_util.CreateCommandAttempt(
          self.command, str(i), common.CommandState.UNKNOWN,
          task=next_leasable_task)
      event = command_event_test_util.CreateTestCommandEvent(
          request_id, command_id, str(i), "InvocationCompleted",
          task=next_leasable_task)
      command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.COMPLETED, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 0)
    command_attempts = command_manager.GetCommandAttempts(
        request_id, command_id)
    self.assertEqual(len(command_attempts), run_count)
    self.assertSetEqual(
        set(attempt.run_index for attempt in command_attempts),
        set(range(run_count)))
    mock_notify.assert_called_with(request_id)
  def testProcessCommandEvent_allocationFailed(self,
                                               mock_command_event_type_count,
                                               mock_event_legacy_count):
    """Should reschedule tasks that send AllocationFailed events."""
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    # Command should stay queued even after MAX_CANCELED_COUNT_BASE different
    # attempts result in an AllocationFailed error.
    num_attempts = command_manager.MAX_CANCELED_COUNT_BASE
    for i in range(num_attempts):
      tasks = command_manager.GetActiveTasks(self.command)
      self.assertEqual(len(tasks), 1)
      command_task_store.LeaseTask(tasks[0].task_id)
      command_event_test_util.CreateCommandAttempt(
          self.command, str(i), common.CommandState.UNKNOWN, task=tasks[0])
      queried_command = command_manager.GetCommand(request_id, command_id)
      self.assertNotEqual(common.CommandState.ERROR, queried_command.state)
      event = command_event_test_util.CreateTestCommandEvent(
          request_id, command_id, str(i), "AllocationFailed", task=tasks[0])
      command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.CANCELED, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 0)
    command_attempts = command_manager.GetCommandAttempts(
        request_id, command_id)
    self.assertEqual(
        len(command_attempts), command_manager.MAX_CANCELED_COUNT_BASE)
    self.assertSetEqual(
        set(attempt.attempt_index for attempt in command_attempts),
        set(range(command_manager.MAX_CANCELED_COUNT_BASE)))
    expected_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "AllocationFailed"
    }
    mock_command_event_type_count.Increment.assert_has_calls(
        [mock.call(expected_metric_fields)] * num_attempts)
    mock_event_legacy_count.Increment.assert_has_calls([mock.call({})] *
                                                       num_attempts)
  def testProcessCommandEvent_unknownEvent(self, mock_notify):
    """Should update command state for unknown events."""
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    command_task_store.LeaseTask(tasks[0].task_id)
    command_event_test_util.CreateCommandAttempt(
        self.command, "0", common.CommandState.UNKNOWN, task=tasks[0])
    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.QUEUED, queried_command.state)
    event = command_event_test_util.CreateTestCommandEvent(
        request_id, command_id, "0", "SomeRandomInexistentType", task=tasks[0])
    command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.QUEUED, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    mock_notify.not_called()
  def testProcessCommandEvent_InvocationStarted(
      self, mock_notify, mock_command_event_type_count):
    """Should update command state for InvocationStarted events.

    State should become RUNNING if it isn't already.

    Args:
      mock_notify: mock function to notify request state changes.
      mock_command_event_type_count: mock command event type count metric
    """
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    command_task_store.LeaseTask(tasks[0].task_id)
    command_event_test_util.CreateCommandAttempt(
        self.command, "0", common.CommandState.UNKNOWN, task=tasks[0])
    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertNotEqual(common.CommandState.RUNNING, queried_command.state)
    event = command_event_test_util.CreateTestCommandEvent(
        request_id, command_id, "0", "InvocationStarted", task=tasks[0])
    command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.RUNNING, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    self.assertEqual(tasks[0].attempt_index, 0)
    self.assertEqual(tasks[0].leasable, False)
    attempts = command_manager.GetCommandAttempts(request_id, command_id)
    self.assertEqual(1, len(attempts))
    self.assertEqual(["0123456789ABCDEF"], attempts[0].device_serials)
    mock_notify.assert_called_with(request_id)
    expected_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "InvocationStarted"
    }
    mock_command_event_type_count.Increment.assert_called_once_with(
        expected_metric_fields)
  def testProcessCommandEvent_invocationCompleted(
      self, mock_notify, mock_command_event_type_count):
    """Should complete command state for InvocationCompleted events.

    Args:
      mock_notify: mock function to notify request state changes.
      mock_command_event_type_count: mock command event type count metric
    """
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    command_task_store.LeaseTask(tasks[0].task_id)
    command_event_test_util.CreateCommandAttempt(
        self.command, "0", common.CommandState.UNKNOWN, task=tasks[0])
    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertNotEqual(common.CommandState.COMPLETED, queried_command.state)
    event = command_event_test_util.CreateTestCommandEvent(
        request_id, command_id, "0", "InvocationCompleted", task=tasks[0])
    command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.COMPLETED, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 0)
    attempts = command_manager.GetCommandAttempts(request_id, command_id)
    self.assertEqual(1, len(attempts))
    self.assertEqual("summary", attempts[0].summary)
    self.assertEqual(1000, attempts[0].total_test_count)
    self.assertEqual(100, attempts[0].failed_test_count)
    self.assertEqual(10, attempts[0].failed_test_run_count)
    mock_notify.assert_called_with(request_id)
    expected_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "InvocationCompleted"
    }
    mock_command_event_type_count.Increment.assert_called_once_with(
        expected_metric_fields)
  def testProcessCommandEvent_fetchFailed(
      self, mock_notify, mock_command_event_type_count):
    """Should error commands from FetchFailed events."""
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    # It should be marked as error at the MAX_ERROR_COUNT_BASE attempt.
    for i in range(command_manager.MAX_ERROR_COUNT_BASE):
      tasks = command_manager.GetActiveTasks(self.command)
      self.assertEqual(len(tasks), 1)
      self.assertEqual(tasks[0].attempt_index, i)
      command_task_store.LeaseTask(tasks[0].task_id)
      command_event_test_util.CreateCommandAttempt(
          self.command, str(i), common.CommandState.UNKNOWN, task=tasks[0])
      queried_command = command_manager.GetCommand(request_id, command_id)
      self.assertNotEqual(common.CommandState.ERROR, queried_command.state)
      event = command_event_test_util.CreateTestCommandEvent(
          request_id, command_id, str(i), "FetchFailed", task=tasks[0])
      command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.ERROR, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 0)
    command_attempts = command_manager.GetCommandAttempts(
        request_id, command_id)
    self.assertEqual(
        len(command_attempts), command_manager.MAX_ERROR_COUNT_BASE)
    self.assertSetEqual(
        set(attempt.attempt_index for attempt in command_attempts),
        set(range(command_manager.MAX_ERROR_COUNT_BASE)))
    mock_notify.assert_called_with(request_id)
    expected_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "FetchFailed"
    }
    mock_command_event_type_count.Increment.assert_has_calls(
        [mock.call(expected_metric_fields)
        ] * command_manager.MAX_ERROR_COUNT_BASE)
  def testProcessCommandEvent_fatalEvent(
      self, mock_notify, mock_command_event_type_count):
    """Should not reschedule a configuration error, request should error out.

    Args:
      mock_notify: mock function to notify request state changes.
      mock_command_event_type_count: mock command event type count metric
    """
    _, request_id, _, command_id = self.command.key.flat()
    command_manager.ScheduleTasks([self.command])

    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 1)
    command_task_store.LeaseTask(tasks[0].task_id)
    command_event_test_util.CreateCommandAttempt(
        self.command, "0", common.CommandState.UNKNOWN, task=tasks[0])
    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertNotEqual(common.CommandState.FATAL, queried_command.state)
    event = command_event_test_util.CreateTestCommandEvent(
        request_id, command_id, "0", "ConfigurationError",
        data={"error_status": "CUSTOMER_ISSUE"},
        task=tasks[0])
    command_event_handler.ProcessCommandEvent(event)

    queried_command = command_manager.GetCommand(request_id, command_id)
    self.assertEqual(common.CommandState.FATAL, queried_command.state)
    tasks = command_manager.GetActiveTasks(self.command)
    self.assertEqual(len(tasks), 0)
    attempts = command_manager.GetCommandAttempts(request_id, command_id)
    self.assertEqual(1, len(attempts))
    mock_notify.assert_called_with(request_id)
    expected_metric_fields = {
        metric.METRIC_FIELD_HOSTNAME: "hostname",
        metric.METRIC_FIELD_TYPE: "ConfigurationError"
    }
    mock_command_event_type_count.Increment.assert_called_once_with(
        expected_metric_fields)
  def _EnsureCommandConsistency(self, request_id, command_id, task_id):
    """Ensures consistency between the command in DB and the task store.

    Args:
      request_id: request id, str
      command_id: command id, str
      task_id: leased task's id for this command.
    Returns:
      True if data is consistent and it should proceed. False otherwise.
    """
    command = command_manager.GetCommand(request_id, command_id)
    if command:
      return (self._EnsureRequestConsistency(command) and
              self._EnsureCommandBeingActive(command))
    else:
      # Command has been deleted.
      logging.warning("Command with id [%s %s] does not exist. Deleting leased "
                      "task [%s].", request_id, command_id, task_id)
      command_manager.DeleteTask(task_id)
    return False
def ProcessCommandEvent(event):
  """Processes a command event.

  Args:
    event: a CommandEvent object.
  """
  logging.debug("Processing command event: %s", str(event))
  command = command_manager.GetCommand(event.request_id, event.command_id)
  LogCommandEventMetrics(command=command, event=event)

  request_sync_key = request_sync_monitor.GetRequestSyncStatusKey(
      event.request_id)
  if request_sync_key.get():
    request_sync_monitor.StoreCommandEvent(event)
  else:
    logging.debug("No request sync found for %s. Using legacy processing.",
                  event.request_id)
    # This can also happen if TFC received late events on final requests.
    metric.command_event_legacy_processing_count.Increment({})
    # We keep the legacy processing as some use cases depend on having command
    # attempt data updated even after the request is final:
    # Eg: measuring fuzzing time
    commander.ProcessCommandEvent(event)
Exemple #14
0
def SyncCommand(request_id, command_id, add_to_sync_queue=True):
    """Sync the command.

  Cancel the command if inactive, otherwise re-add to the sync queue to check
  back again later.

  Args:
    request_id: Request ID for the command to sync
    command_id: Command ID for the command to sync
    add_to_sync_queue: Flag to determine if this command should be added to the
      sync queue
  """
    command = command_manager.GetCommand(request_id, command_id)

    if not command:
        logging.warning('No command found to sync. Request %s Command %s',
                        request_id, command_id)
        return

    if command.state in common.FINAL_COMMAND_STATES:
        # No need to sync commands in final states
        logging.info('Command reached final state %s. Request %s Command %s',
                     command.state, request_id, command_id)
        return

    if command.state != common.CommandState.QUEUED:
        # Only consider queued commands for inactivity checks.
        if add_to_sync_queue:
            AddToSyncQueue(command)
        return

    last_active_time = command_manager.GetLastCommandActiveTime(command)
    inactive_time = common.Now() - last_active_time
    if datetime.timedelta(minutes=MAX_COMMAND_EVENT_DELAY_MIN) < inactive_time:
        # Ensure command is leasable. If a worker leases a command task but does
        # not report back within MAX_COMMAND_EVENT_DELAY, we need to make it
        # leasable again.
        logging.debug(
            '%r has no events for %r minutes (> %r); ensuring leasable.',
            command.key, inactive_time, MAX_COMMAND_EVENT_DELAY_MIN)
        try:
            command_manager.EnsureLeasable(command)
        except command_manager.CommandTaskNotFoundError:
            logging.exception('command %s %s is not leasable; cancelling...',
                              request_id, command_id)
            _HandleCommandNotExecutable(request_id, command_id)
            return

    timeout_seconds = GetCommandQueueTimeoutSeconds(command)
    if inactive_time > datetime.timedelta(seconds=timeout_seconds):
        # Command timed out.
        logging.info((
            'Canceling command %s: last active %s, inactive time %s is greater '
            'than %d seconds'), command.key, last_active_time, inactive_time,
                     timeout_seconds)
        _HandleInactiveCommand(command, request_id, command_id)
        return

    # Add to the sync queue to check again later.
    if add_to_sync_queue:
        AddToSyncQueue(command)