def setUp(self):
    super(CommandEventHandlerTest, self).setUp()
    self.testapp = webtest.TestApp(command_event_handler.APP)
    self.plugin_patcher = mock.patch(
        "__main__.env_config.CONFIG.plugin")
    self.plugin_patcher.start()

    self.request = request_manager.CreateRequest(
        request_id="1001",
        user="******",
        command_infos=[
            datastore_entities.CommandInfo(
                command_line="command_line",
                cluster="cluster",
                run_target="run_target"),
        ])
    self.command = command_manager.CreateCommands(
        request_id=self.request.key.id(),
        command_infos=[
            datastore_entities.CommandInfo(
                command_line="long command line",
                cluster="foobar",
                run_target="foo",
                run_count=1,
                shard_count=1),
        ],
        shard_indexes=list(range(1)),
        request_plugin_data={
            "ants_invocation_id": "i123",
            "ants_work_unit_id": "w123"
        })[0]
    self.now_patcher = mock.patch.object(common, "Now")
    self.mock_now = self.now_patcher.start()
    self.mock_now.return_value = TIMESTAMP
Ejemplo n.º 2
0
  def setUp(self):
    super(CommandAttemptMonitorTest, self).setUp()
    self.testapp = webtest.TestApp(command_attempt_monitor.APP)
    self.plugin_patcher = mock.patch(
        '__main__.env_config.CONFIG.plugin')
    self.plugin_patcher.start()

    self.request = request_manager.CreateRequest(
        request_id='1001',
        user='******',
        command_infos=[
            datastore_entities.CommandInfo(
                command_line='command_line',
                cluster='cluster',
                run_target='run_target'),
        ])
    self.command = command_manager.CreateCommands(
        request_id=self.request.key.id(),
        command_infos=[
            datastore_entities.CommandInfo(
                command_line='long command line',
                cluster='foobar',
                run_target='foo',
                run_count=1,
                shard_count=1),
        ],
        shard_indexes=list(range(1)),
        request_plugin_data={
            'ants_invocation_id': 'i123',
            'ants_work_unit_id': 'w123'
        })[0]
    # Clear Datastore cache
    ndb.get_context().clear_cache()
Ejemplo n.º 3
0
 def testMonitor(self, sync):
   commands = command_manager.CreateCommands(
       request_id=self.request_2.key.id(),
       command_infos=[
           datastore_entities.CommandInfo(
               command_line='long command line',
               shard_count=2,
               run_target='foo',
               run_count=1,
               cluster='foobar'),
           datastore_entities.CommandInfo(
               command_line='longer_command_line',
               shard_count=2,
               run_target='foo',
               run_count=1,
               cluster='foobar'),
       ],
       shard_indexes=list(range(2)))
   num_monitored = command_monitor.Monitor(commands)
   self.assertEqual(2, num_monitored)
   tasks = self.mock_task_scheduler.GetTasks()
   self.assertEqual(2, len(tasks))
   response_0 = self.testapp.post(
       '/_ah/queue/%s' % command_monitor.COMMAND_SYNC_QUEUE,
       tasks[0].payload)
   self.assertEqual('200 OK', response_0.status)
   response_1 = self.testapp.post(
       '/_ah/queue/%s' % command_monitor.COMMAND_SYNC_QUEUE,
       tasks[1].payload)
   self.assertEqual('200 OK', response_1.status)
   sync.assert_has_calls([
       mock.call(self.request_2.key.id(), commands[0].key.id()),
       mock.call(self.request_2.key.id(), commands[1].key.id())
   ])
Ejemplo n.º 4
0
  def testAddToSyncQueue_CustomCancelDeadline(self, mock_add):
    # Create a command with a custom 10 hour command timeout that needs to be
    # cancelled in 1 minute.
    datastore_entities.Command.update_time._auto_now = False
    now = datetime.datetime.utcnow()
    custom_timeout = 10 * 3600
    command = command_manager.CreateCommands(
        request_id=self.request.key.id(),
        command_infos=[
            datastore_entities.CommandInfo(
                command_line='command line',
                run_target='run_target',
                run_count=1,
                shard_count=1,
                cluster='cluster')
        ],
        shard_indexes=list(range(1)),
        queue_timeout_seconds=custom_timeout)[0]
    _, request_id, _, command_id = command.key.flat()
    command.state = common.CommandState.QUEUED
    command.update_time = now - datetime.timedelta(seconds=custom_timeout - 60)
    command.put()

    command_monitor.AddToSyncQueue(command)

    # Command monitor should schedule it to be synced in 1 minute.
    payload = json.dumps({
        command_manager.COMMAND_ID_KEY: command_id,
        command_manager.REQUEST_ID_KEY: request_id,
    })
    mock_add.assert_called_once_with(
        queue_name=command_monitor.COMMAND_SYNC_QUEUE,
        payload=payload,
        eta=now + datetime.timedelta(minutes=1))
Ejemplo n.º 5
0
  def testAddToSyncQueue_RunningCommand(self, mock_add, mock_now):
    # Create a command that has been running for 3 hours.
    datastore_entities.Command.update_time._auto_now = False
    now = datetime.datetime.utcnow()
    mock_now.return_value = now
    command = command_manager.CreateCommands(
        request_id=self.request.key.id(),
        command_infos=[
            datastore_entities.CommandInfo(
                command_line='command line',
                run_target='run_target',
                run_count=1,
                shard_count=1,
                cluster='cluster')
        ],
        shard_indexes=list(range(1)))[0]
    _, request_id, _, command_id = command.key.flat()
    command.state = common.CommandState.RUNNING
    command.update_time = now - datetime.timedelta(hours=3)
    command.put()

    command_monitor.AddToSyncQueue(command)

    # Command monitor should schedule it to be synced in
    # MAX_COMMAND_EVENT_DELAY_MINs.
    payload = json.dumps({
        command_manager.COMMAND_ID_KEY: command_id,
        command_manager.REQUEST_ID_KEY: request_id,
    })
    mock_add.assert_called_once_with(
        queue_name=command_monitor.COMMAND_SYNC_QUEUE,
        payload=payload,
        eta=now + datetime.timedelta(
            minutes=command_monitor.MAX_COMMAND_EVENT_DELAY_MIN))
Ejemplo n.º 6
0
 def testSyncCommand(self, mock_ensure, sync):
   datastore_entities.Command.update_time._auto_now = False
   now = datetime.datetime.utcnow()
   command = command_manager.CreateCommands(
       request_id=self.request.key.id(),
       command_infos=[
           datastore_entities.CommandInfo(
               command_line='long command line',
               cluster='foobar',
               run_target='foo',
               run_count=1,
               shard_count=1)
       ],
       shard_indexes=list(range(1)))[0]
   command.state = common.CommandState.QUEUED
   command.update_time = (
       now - datetime.timedelta(
           minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN) * 2)
   command.put()
   command_monitor.SyncCommand(command.request_id, command.key.id())
   mock_ensure.assert_called_once_with(
       hamcrest.match_equality(hamcrest.has_property('key', command.key)))
   self.assertEqual(common.CommandState.CANCELED, command.key.get().state)
   self.assertEqual(common.RequestState.CANCELED, self.request.key.get().state)
   sync.assert_not_called()
 def _CreateAttempt(self, attempt_id, task_id, state):
   # Helper to create an attempt
   command = command_manager.CreateCommands(
       request_id=self.request.key.id(),
       command_infos=[
           datastore_entities.CommandInfo(
               command_line='long command line',
               run_target='foo',
               run_count=1,
               shard_count=1,
               cluster='foobar')
       ],
       shard_indexes=list(range(1)),
       request_plugin_data={
           'ants_invocation_id': 'i123',
           'ants_work_unit_id': 'w123'
       })[0]
   _, request_id, _, command_id = command.key.flat()
   attempt_key = ndb.Key(
       datastore_entities.Request, request_id,
       datastore_entities.Command, command_id,
       datastore_entities.CommandAttempt, attempt_id,
       namespace=common.NAMESPACE)
   attempt = datastore_entities.CommandAttempt(
       key=attempt_key,
       attempt_id=attempt_id,
       state=state,
       command_id=command_id,
       task_id=task_id)
   attempt.put()
   return attempt
  def testEnqueueCommandEvents_multipleEvents(self):
    self.request = request_manager.CreateRequest(
        request_id="9999",
        user="******",
        command_infos=[
            datastore_entities.CommandInfo(
                command_line="command_line",
                cluster="cluster",
                run_target="run_target",
                shard_count=2)
        ])
    command_1, command_2 = command_manager.CreateCommands(
        request_id=self.request.key.id(),
        command_infos=[
            datastore_entities.CommandInfo(
                command_line="long command line 0",
                cluster="foobar",
                run_target="foo",
                run_count=1,
                shard_count=2),
            datastore_entities.CommandInfo(
                command_line="long command line 1",
                cluster="foobar",
                run_target="foo",
                run_count=1,
                shard_count=2)
        ],
        shard_indexes=list(range(2)))
    _, request_id, _, command_1_id = command_1.key.flat()
    _, _, _, command_2_id = command_2.key.flat()
    command_event_test_util.CreateCommandAttempt(
        command_1, "aid", common.CommandState.QUEUED)
    command_event_test_util.CreateCommandAttempt(
        command_2, "aid", common.CommandState.QUEUED)

    event = command_event_test_util.CreateTestCommandEventJson(
        request_id, command_1_id, "aid", "InvocationStarted")
    event2 = command_event_test_util.CreateTestCommandEventJson(
        request_id, command_2_id, "aid", "InvocationStarted")
    event3 = command_event_test_util.CreateTestCommandEventJson(
        request_id, command_1_id, "aid", "InvocationCompleted")
    event4 = command_event_test_util.CreateTestCommandEventJson(
        request_id, command_2_id, "aid", "InvocationCompleted")
    command_event_handler.EnqueueCommandEvents([event, event2, event3, event4])

    tasks = self.mock_task_scheduler.GetTasks()
    self.assertEqual(len(tasks), 4)
    for task in tasks:
      self.testapp.post(
          command_event_handler.COMMAND_EVENT_HANDLER_PATH, task.payload)

    command_attempts = command_manager.GetCommandAttempts(
        request_id, command_1_id)
    self.assertEqual(len(command_attempts), 1)
    self.assertEqual(common.CommandState.COMPLETED, command_attempts[0].state)
    command_attempts = command_manager.GetCommandAttempts(
        request_id, command_2_id)
    self.assertEqual(len(command_attempts), 1)
    self.assertEqual(common.CommandState.COMPLETED, command_attempts[0].state)
Ejemplo n.º 9
0
  def testSyncCommand_withCustomQueueTimeout(self, mock_ensure, sync):
    datastore_entities.Command.update_time._auto_now = False
    now = datetime.datetime.utcnow()
    command_1, command_2 = command_manager.CreateCommands(
        request_id=self.request_2.key.id(),
        command_infos=[
            datastore_entities.CommandInfo(
                command_line='long command line',
                shard_count=2,
                run_target='foo',
                run_count=1,
                cluster='foobar'),
            datastore_entities.CommandInfo(
                command_line='longer_command_line',
                shard_count=2,
                run_target='foo',
                run_count=1,
                cluster='foobar'),
        ],
        shard_indexes=list(range(2)),
        queue_timeout_seconds=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN *
        2 * 60)
    # Change update times. command_1 should ensure leasable, command_2 should
    # ensure leasable and cancel afterwards
    command_1.state = common.CommandState.QUEUED
    command_1.update_time = (
        now - datetime.timedelta(
            minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN))
    command_1.put()
    command_2.state = common.CommandState.QUEUED
    command_2.update_time = (
        now - datetime.timedelta(
            minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN) * 3)
    command_2.put()

    command_monitor.SyncCommand(command_1.request_id, command_1.key.id())
    command_monitor.SyncCommand(command_2.request_id, command_2.key.id())

    mock_ensure.assert_has_calls([
        mock.call(
            hamcrest.match_equality(
                hamcrest.has_property('key', command_1.key))),
        mock.call(
            hamcrest.match_equality(
                hamcrest.has_property('key', command_2.key)))
    ])
    self.assertEqual(common.CommandState.QUEUED, command_1.key.get().state)
    self.assertEqual(common.CommandState.CANCELED, command_2.key.get().state)
    self.assertEqual(common.RequestState.CANCELED,
                     self.request_2.key.get().state)
    sync.assert_called_once_with(command_1)
 def testBackfillCommands(self, mock_add):
   command_1, command_2, command_3 = command_manager.CreateCommands(
       request_id=self.request.key.id(),
       command_infos=[
           datastore_entities.CommandInfo(
               command_line='long command line',
               shard_count=3,
               run_target='foo',
               run_count=1,
               cluster='foobar'),
           datastore_entities.CommandInfo(
               command_line='longer_command_line',
               shard_count=3,
               run_target='foo',
               run_count=1,
               cluster='foobar'),
           datastore_entities.CommandInfo(
               command_line='short_cmd',
               shard_count=3,
               run_target='foo',
               run_count=1,
               cluster='foobar'),
       ],
       shard_indexes=list(range(3)),
       request_plugin_data={
           'ants_invocation_id': 'i123',
           'ants_work_unit_id': 'w123'
       })
   command_1.state = common.CommandState.QUEUED
   command_1.put()
   command_2.state = common.CommandState.QUEUED
   command_2.put()
   command_3.state = common.CommandState.RUNNING
   command_3.put()
   response = self.testapp.post_json(
       '/_ah/api/CoordinatorApi.BackfillCommands', {})
   self.assertEqual('200 OK', response.status)
   mock_add.assert_has_calls(
       [
           mock.call(
               hamcrest.match_equality(
                   hamcrest.has_property('key', command_1.key))),
           mock.call(
               hamcrest.match_equality(
                   hamcrest.has_property('key', command_2.key))),
       ], any_order=True)
   self.assertEqual(2, mock_add.call_count)
Ejemplo n.º 11
0
    def testCheckPendingCommands_canceledRequest(self, schedule_tasks,
                                                 monitor):
        request_id = "1001"
        command_infos = [
            datastore_entities.CommandInfo(command_line="command_line %04d" %
                                           i,
                                           cluster="cluster %04d" % i,
                                           run_target="run_target %04d" % i,
                                           run_count=1,
                                           shard_count=1) for i in range(10)
        ]
        request = datastore_test_util.CreateRequest(
            request_id=request_id,
            user="******",
            command_infos=command_infos,
            max_concurrent_tasks=5,
            plugin_data={
                "FOO": "foo",
                "BAR": "'bar",
            })
        command_manager.CreateCommands(request_id=request_id,
                                       command_infos=command_infos,
                                       priority=request.priority,
                                       shard_indexes=[0] * len(command_infos))
        request.state = common.RequestState.CANCELED
        request.put()
        commands = command_manager.GetCommands(request_id)
        for i, command in enumerate(commands):
            if i < 2:
                command.state = common.CommandState.COMPLETED
            elif i < 5:
                command.state = common.CommandState.QUEUED
            else:
                command.state = common.CommandState.UNKNOWN
            command.put()
        request_summary = request_manager.RequestSummary()
        request_summary.completed_count = 2
        request_summary.queued_count = 3
        request_summary.pending_count = 5

        commander._CheckPendingCommands(request, request_summary)

        schedule_tasks.assert_not_called()
        monitor.assert_not_called()
Ejemplo n.º 12
0
 def _CreateCommand(self,
                    request_id=REQUEST_ID,
                    run_count=1,
                    priority=None,
                    command_line="command_line1"):
     """Helper to create a command."""
     command = command_manager.CreateCommands(
         request_id=request_id,
         command_infos=[
             datastore_entities.CommandInfo(command_line=command_line,
                                            cluster="cluster",
                                            run_target="run_target",
                                            run_count=run_count,
                                            shard_count=1),
         ],
         priority=priority,
         shard_indexes=[0],
         request_plugin_data={
             "ants_invocation_id": "i123",
             "command_ants_work_unit_id": "w123"
         })[0]
     return command
Ejemplo n.º 13
0
 def testSyncCommand_runningState_doNotAddToQueue(self, mock_ensure, sync):
   datastore_entities.Command.update_time._auto_now = False
   now = datetime.datetime.utcnow()
   command = command_manager.CreateCommands(
       request_id=self.request.key.id(),
       command_infos=[
           datastore_entities.CommandInfo(
               command_line='long command line',
               cluster='foobar',
               run_target='foo',
               run_count=1,
               shard_count=1)
       ],
       shard_indexes=list(range(1)))[0]
   command.state = common.CommandState.RUNNING
   command.update_time = (
       now - datetime.timedelta(
           minutes=command_monitor.MAX_COMMAND_INACTIVE_TIME_MIN) * 2)
   command.put()
   command_monitor.SyncCommand(command.request_id, command.key.id(), False)
   mock_ensure.assert_not_called()
   self.assertEqual(common.CommandState.RUNNING, command.key.get().state)
   sync.assert_not_called()
Ejemplo n.º 14
0
    def testProcessCommandEvent_pendingCommands(self, attempt_metric, monitor):
        # Test ProcessCommandEvent for a non-final state with deletion
        request_id = "1001"
        command_infos = [
            datastore_entities.CommandInfo(command_line="command_line %04d" %
                                           i,
                                           cluster="cluster %04d" % i,
                                           run_target="run_target %04d" % i,
                                           run_count=1,
                                           shard_count=1) for i in range(10)
        ]
        request = datastore_test_util.CreateRequest(
            request_id=request_id,
            user="******",
            command_infos=command_infos,
            max_concurrent_tasks=5,
            plugin_data={
                "FOO": "foo",
                "BAR": "'bar",
            })
        commands = command_manager.CreateCommands(request_id=request_id,
                                                  command_infos=command_infos,
                                                  priority=request.priority,
                                                  shard_indexes=[0] *
                                                  len(command_infos))
        command_manager.ScheduleTasks(commands[:5])
        _, request_id, _, command_id = commands[0].key.flat()
        pending_commands = command_manager.GetCommands(
            request_id, common.CommandState.UNKNOWN)
        self.assertEqual(5, len(pending_commands))
        queued_commands = command_manager.GetCommands(
            request_id, common.CommandState.QUEUED)
        self.assertEqual(5, len(queued_commands))

        tasks = command_manager.GetActiveTasks(commands[0])
        self.assertEqual(1, len(tasks))
        command_task_store.LeaseTask(tasks[0].task_id)
        command_event_test_util.CreateCommandAttempt(
            commands[0],
            "attempt0",
            common.CommandState.UNKNOWN,
            task=tasks[0])
        event = command_event_test_util.CreateTestCommandEvent(
            request_id,
            command_id,
            "attempt0",
            common.InvocationEventType.INVOCATION_COMPLETED,
            task=tasks[0],
            time=TIMESTAMP)

        commander.ProcessCommandEvent(event)

        tasks = command_manager.GetActiveTasks(commands[0])
        self.assertEqual(0, len(tasks))
        command = commands[0].key.get(use_cache=False)
        self.assertEqual(common.CommandState.COMPLETED, command.state)
        attempt_metric.assert_called_once_with(cluster_id=command.cluster,
                                               run_target=command.run_target,
                                               hostname="hostname",
                                               state="COMPLETED")
        next_command = pending_commands[0]
        monitor.assert_called_once_with([next_command])
        next_command = pending_commands[0].key.get(use_cache=False)
        self.assertEqual(common.CommandState.QUEUED, next_command.state)
        pending_commands = command_manager.GetCommands(
            request_id, common.CommandState.UNKNOWN)
        self.assertEqual(4, len(pending_commands))
        queued_commands = command_manager.GetCommands(
            request_id, common.CommandState.QUEUED)
        self.assertEqual(5, len(queued_commands))
        completed_commands = command_manager.GetCommands(
            request_id, common.CommandState.COMPLETED)
        self.assertEqual(1, len(completed_commands))
Ejemplo n.º 15
0
def _CreateCommands(request):
    """Create a list of commands for a request."""
    expanded_command_infos = []
    shard_indexes = []
    for command_info in request.command_infos:
        if command_info.cluster is None:
            raise ValueError("cluster is not specified.")
        if not command_info.run_target:
            raise ValueError("run target is not defined.")
        # TODO: Check in db to see that it is a valid run target.
        if command_info.run_count < 1:
            raise ValueError("run count must be equal or greater than 1.")

        max_shards = RUN_TARGET_TO_MAX_SHARDS_MAP.get(command_info.run_target,
                                                      DEFAULT_MAX_SHARDS)
        if not 0 < command_info.shard_count <= max_shards:
            raise ValueError("shard count %d is outside of range [1, %d]" %
                             (command_info.shard_count, max_shards))
        # TODO: Move validity check to request_manager.

        command_line = command_util.CommandLine(command_info.command_line)
        command_line.RemoveOptions([
            # TFC-specific options
            "--cluster",
            "--run-target",
            "--run-count",

            # TF conflicting options
            "--loop",  # causes TF to loop test runs continuously
            "--product-type",  # causes TF to fail device allocations
            "--test-iterations",  # specifies the number of iterations to run
        ])
        # Schedule commands and tag them with a run_target.
        # TF implicitly knows how to map a device to a run_target string. When
        # fetching commands, TF looks for only commands tagged with run_targets
        # which are available on itself.
        for shard_index in range(command_info.shard_count):
            # If the request is unmanaged, use command line to inject shard
            # parameters.
            if not request.type:
                # If local sharding was defined keep the original shard setup
                local_sharding = False
                if command_line.GetOption(
                        "--shard-count"
                ) is not None and command_line.GetOption(
                        "--shard-index") is None:
                    local_sharding = True

                if not local_sharding:
                    command_line.RemoveOptions(
                        ["--shard-count", "--shard-index"])
                    if command_info.shard_count > 1:
                        command_line.AddOption("--shard-count",
                                               str(command_info.shard_count))
                        command_line.AddOption("--shard-index",
                                               str(shard_index))
            expanded_command_infos.append(
                datastore_entities.CommandInfo(
                    name=command_info.name,
                    command_line=command_line.ToTFString(),
                    cluster=command_info.cluster,
                    run_target=command_info.run_target,
                    run_count=command_info.run_count,
                    shard_count=command_info.shard_count))
            shard_indexes.append(shard_index)

    commands = command_manager.CreateCommands(
        request_id=request.key.id(),
        request_plugin_data=request.plugin_data,
        command_infos=expanded_command_infos,
        shard_indexes=shard_indexes,
        priority=request.priority,
        queue_timeout_seconds=request.queue_timeout_seconds,
        request_type=request.type)
    if request.prev_test_context:
        for command in commands:
            command_manager.UpdateTestContext(
                request_id=request.key.id(),
                command_id=command.key.id(),
                test_context=request.prev_test_context)
    return commands