Beispiel #1
0
 def test_cron_handle_bot_died_second(self):
   # Test two tries internal_failure's leading to a BOT_DIED status.
   self.mock(random, 'getrandbits', lambda _: 0x88)
   now = utils.utcnow()
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
       created_ts=now,
       expiration_ts=now+datetime.timedelta(seconds=600))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   _request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(1, run_result.try_number)
   self.assertEqual(task_result.State.RUNNING, run_result.state)
   self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
   self.assertEqual(([], 1, 0), task_scheduler.cron_handle_bot_died('f.local'))
   now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
   # It must be a different bot.
   _request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost-second', 'abc')
   now_2 = self.mock_now(self.now + 2 * task_result.BOT_PING_TOLERANCE, 3)
   self.assertEqual(
       (['1d69b9f088008812'], 0, 0),
       task_scheduler.cron_handle_bot_died('f.local'))
   self.assertEqual(([], 0, 0), task_scheduler.cron_handle_bot_died('f.local'))
   expected = {
     'abandoned_ts': now_2,
     'bot_dimensions': bot_dimensions,
     'bot_id': u'localhost-second',
     'bot_version': u'abc',
     'children_task_ids': [],
     'completed_ts': None,
     'costs_usd': [0., 0.],
     'cost_saved_usd': None,
     'created_ts': self.now,
     'deduped_from': None,
     'durations': [],
     'exit_codes': [],
     'failure': False,
     'id': '1d69b9f088008810',
     'internal_failure': True,
     'modified_ts': now_2,
     'name': u'Request name',
     'outputs_ref': None,
     'properties_hash': None,
     'server_versions': [u'v1a'],
     'started_ts': now_1,
     'state': task_result.State.BOT_DIED,
     'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
     'try_number': 2,
     'user': u'Jesus',
   }
   self.assertEqual(expected, run_result.result_summary_key.get().to_dict())
Beispiel #2
0
 def test_cron_handle_bot_died_second(self):
   # Test two tries internal_failure's leading to a BOT_DIED status.
   self.mock(random, 'getrandbits', lambda _: 0x88)
   now = utils.utcnow()
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
       created_ts=now,
       expiration_ts=now+datetime.timedelta(seconds=600))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   _request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(1, run_result.try_number)
   self.assertEqual(task_result.State.RUNNING, run_result.state)
   self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
   self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())
   now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
   # It must be a different bot.
   _request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost-second', 'abc')
   now_2 = self.mock_now(self.now + 2 * task_result.BOT_PING_TOLERANCE, 3)
   self.assertEqual((1, 0, 0), task_scheduler.cron_handle_bot_died())
   self.assertEqual((0, 0, 0), task_scheduler.cron_handle_bot_died())
   expected = {
     'abandoned_ts': now_2,
     'bot_dimensions': bot_dimensions,
     'bot_id': u'localhost-second',
     'bot_version': u'abc',
     'children_task_ids': [],
     'completed_ts': None,
     'costs_usd': [0., 0.],
     'cost_saved_usd': None,
     'created_ts': self.now,
     'deduped_from': None,
     'durations': [],
     'exit_codes': [],
     'failure': False,
     'id': '1d69b9f088008810',
     'internal_failure': True,
     'modified_ts': now_2,
     'name': u'Request name',
     'outputs_ref': None,
     'properties_hash': None,
     'server_versions': [u'v1a'],
     'started_ts': now_1,
     'state': task_result.State.BOT_DIED,
     'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
     'try_number': 2,
     'user': u'Jesus',
   }
   self.assertEqual(expected, run_result.result_summary_key.get().to_dict())
Beispiel #3
0
  def test_bot_kill_task(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    dimensions = {u'OS': u'Windows-3.1.1'}
    request = task_request.make_request(
        _gen_request(properties={'dimensions': dimensions}), True)
    result_summary = task_scheduler.schedule_request(request)
    reaped_request, run_result = task_scheduler.bot_reap_task(
        {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')

    self.assertEqual(
        None, task_scheduler.bot_kill_task(run_result.key, 'localhost'))
    expected = {
      'abandoned_ts': self.now,
      'bot_dimensions': dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': True,
      'modified_ts': self.now,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.BOT_DIED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.key.get().to_dict())
    expected = {
      'abandoned_ts': self.now,
      'bot_dimensions': dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'cost_usd': 0.,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008811',
      'internal_failure': True,
      'modified_ts': self.now,
      'outputs_ref': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.BOT_DIED,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
Beispiel #4
0
  def test_bot_kill_task(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    dimensions = {u'OS': u'Windows-3.1.1'}
    request = task_request.make_request(
        _gen_request(properties={'dimensions': dimensions}), True)
    result_summary = task_scheduler.schedule_request(request)
    reaped_request, run_result = task_scheduler.bot_reap_task(
        {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')

    self.assertEqual(
        None, task_scheduler.bot_kill_task(run_result.key, 'localhost'))
    expected = {
      'abandoned_ts': self.now,
      'bot_dimensions': dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': True,
      'modified_ts': self.now,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.BOT_DIED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.key.get().to_dict())
    expected = {
      'abandoned_ts': self.now,
      'bot_dimensions': dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'cost_usd': 0.,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008811',
      'internal_failure': True,
      'modified_ts': self.now,
      'outputs_ref': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.BOT_DIED,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
Beispiel #5
0
  def test_bot_update_pubsub_error(self):
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        pubsub_topic='projects/abc/topics/def')
    request = task_request.make_request(data, True)
    task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual('localhost', run_result.bot_id)

    # Attempt to terminate the task with success, but make PubSub call fail.
    self.mock_pub_sub(publish_successful=False)
    self.assertEqual(
        (False, False),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    # Bot retries bot_update, now PubSub works and notification is sent.
    pub_sub_calls = self.mock_pub_sub(publish_successful=True)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))
    self.assertEqual(1, len(pub_sub_calls)) # notification is sent
  def _bot_update_timeouts(self, hard, io):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request_data(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data)
    result_summary = task_scheduler.schedule_request(request)
    reaped_request, run_result = task_scheduler.bot_reap_task(
        {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'hi', 0, 0, 0.1, hard, io, 0.1))
    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': True,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': self.now,
      'name': u'Request name',
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.TIMED_OUT,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.key.get().to_dict())

    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'cost_usd': 0.1,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': True,
      'id': '1d69b9f088008811',
      'internal_failure': False,
      'modified_ts': self.now,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.TIMED_OUT,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
def _quick_reap():
    """Reaps a task."""
    data = _gen_request_data(properties=dict(
        dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data)
    _result_summary = task_scheduler.schedule_request(request)
    reaped_request, run_result = task_scheduler.bot_reap_task(
        {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
    return run_result
Beispiel #8
0
def _quick_reap():
  """Reaps a task."""
  data = _gen_request(
      properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
  request = task_request.make_request(data, True)
  _result_summary = task_scheduler.schedule_request(request)
  reaped_request, run_result = task_scheduler.bot_reap_task(
      {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
  return run_result
    def _bot_update_timeouts(self, hard, io):
        self.mock(random, 'getrandbits', lambda _: 0x88)
        data = _gen_request_data(properties=dict(
            dimensions={u'OS': u'Windows-3.1.1'}))
        request = task_request.make_request(data)
        result_summary = task_scheduler.schedule_request(request)
        reaped_request, run_result = task_scheduler.bot_reap_task(
            {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
        self.assertEqual(
            (True, True),
            task_scheduler.bot_update_task(run_result.key, 'localhost', 'hi',
                                           0, 0, 0.1, hard, io, 0.1))
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': self.now,
            'costs_usd': [0.1],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [0.1],
            'exit_codes': [0],
            'failure': True,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': self.now,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': self.now,
            'state': State.TIMED_OUT,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.key.get().to_dict())

        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': self.now,
            'cost_usd': 0.1,
            'durations': [0.1],
            'exit_codes': [0],
            'failure': True,
            'id': '1d69b9f088008811',
            'internal_failure': False,
            'modified_ts': self.now,
            'server_versions': [u'v1a'],
            'started_ts': self.now,
            'state': State.TIMED_OUT,
            'try_number': 1,
        }
        self.assertEqual(expected, run_result.key.get().to_dict())
Beispiel #10
0
  def test_cron_abort_expired_task_to_run_retry(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    now = utils.utcnow()
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        created_ts=now,
        expiration_ts=now+datetime.timedelta(seconds=600))
    request = task_request.make_request(data, True)
    result_summary = task_scheduler.schedule_request(request)

    # Fake first try bot died.
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())
    self.assertEqual(task_result.State.BOT_DIED, run_result.key.get().state)
    self.assertEqual(
        task_result.State.PENDING, run_result.result_summary_key.get().state)

    # BOT_DIED is kept instead of EXPIRED.
    abandoned_ts = self.mock_now(self.now, request.expiration_secs+1)
    self.assertEqual(1, task_scheduler.cron_abort_expired_task_to_run())
    self.assertEqual(1, len(task_result.TaskRunResult.query().fetch()))
    expected = {
      'abandoned_ts': abandoned_ts,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': True,
      'modified_ts': abandoned_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': task_result.State.BOT_DIED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.key.get().to_dict())
Beispiel #11
0
  def test_cron_abort_expired_task_to_run_retry(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    now = utils.utcnow()
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        created_ts=now,
        expiration_ts=now+datetime.timedelta(seconds=600))
    request = task_request.make_request(data, True)
    result_summary = task_scheduler.schedule_request(request)

    # Fake first try bot died.
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())
    self.assertEqual(task_result.State.BOT_DIED, run_result.key.get().state)
    self.assertEqual(
        task_result.State.PENDING, run_result.result_summary_key.get().state)

    # BOT_DIED is kept instead of EXPIRED.
    abandoned_ts = self.mock_now(self.now, request.expiration_secs+1)
    self.assertEqual(1, task_scheduler.cron_abort_expired_task_to_run())
    self.assertEqual(1, len(task_result.TaskRunResult.query().fetch()))
    expected = {
      'abandoned_ts': abandoned_ts,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': True,
      'modified_ts': abandoned_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': task_result.State.BOT_DIED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.key.get().to_dict())
Beispiel #12
0
 def test_cancel_task_running(self):
     data = _gen_request_data(properties=dict(
         dimensions={u'OS': u'Windows-3.1.1'}))
     request = task_request.make_request(data)
     result_summary = task_scheduler.schedule_request(request)
     reaped_request, run_result = task_scheduler.bot_reap_task(
         {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
     ok, was_running = task_scheduler.cancel_task(result_summary.key)
     self.assertEqual(False, ok)
     self.assertEqual(True, was_running)
     result_summary = result_summary.key.get()
     self.assertEqual(task_result.State.RUNNING, result_summary.state)
Beispiel #13
0
 def test_cancel_task_running(self):
   data = _gen_request_data(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
   request = task_request.make_request(data)
   result_summary = task_scheduler.schedule_request(request)
   reaped_request, run_result = task_scheduler.bot_reap_task(
       {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
   ok, was_running = task_scheduler.cancel_task(result_summary.key)
   self.assertEqual(False, ok)
   self.assertEqual(True, was_running)
   result_summary = result_summary.key.get()
   self.assertEqual(task_result.State.RUNNING, result_summary.state)
Beispiel #14
0
 def _task_deduped(self,
                   new_ts,
                   deduped_from,
                   task_id='1d8dc670a0008810',
                   now=None):
     data = _gen_request_data(name='yay',
                              user='******',
                              properties=dict(
                                  dimensions={u'OS': u'Windows-3.1.1'},
                                  idempotent=True))
     request = task_request.make_request(data)
     _result_summary = task_scheduler.schedule_request(request)
     bot_dimensions = {
         u'OS': [u'Windows', u'Windows-3.1.1'],
         u'hostname': u'localhost',
         u'foo': u'bar',
     }
     self.assertEqual(None,
                      task_to_run.TaskToRun.query().get().queue_number)
     actual_request_2, run_result_2 = task_scheduler.bot_reap_task(
         bot_dimensions, 'localhost', 'abc')
     self.assertEqual(None, actual_request_2)
     result_summary_duped, run_results_duped = get_results(request.key)
     expected = {
         'abandoned_ts': None,
         'bot_id': u'localhost',
         'bot_version': u'abc',
         'children_task_ids': [],
         'completed_ts': now or self.now,
         'costs_usd': [],
         'cost_saved_usd': 0.1,
         'created_ts': new_ts,
         'deduped_from': deduped_from,
         'durations': [0.1],
         'exit_codes': [0],
         'failure': False,
         'id': task_id,
         'internal_failure': False,
         # Only this value is updated to 'now', the rest uses the previous run
         # timestamps.
         'modified_ts': new_ts,
         'name': u'yay',
         # A deduped task cannot be deduped against.
         'properties_hash': None,
         'server_versions': [u'v1a'],
         'started_ts': now or self.now,
         'state': State.COMPLETED,
         'try_number': 0,
         'user': u'Raoul',
     }
     self.assertEqual(expected, result_summary_duped.to_dict())
     self.assertEqual([], run_results_duped)
Beispiel #15
0
 def test_bot_kill_task_wrong_bot(self):
   self.mock(random, 'getrandbits', lambda _: 0x88)
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
   request = task_request.make_request(data, True)
   result_summary = task_scheduler.schedule_request(request)
   reaped_request, run_result = task_scheduler.bot_reap_task(
       {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
   expected = (
     'Bot bot1 sent task kill for task 1d69b9f088008811 owned by bot '
     'localhost')
   self.assertEqual(
       expected, task_scheduler.bot_kill_task(run_result.key, 'bot1'))
Beispiel #16
0
 def test_bot_kill_task_wrong_bot(self):
     self.mock(random, 'getrandbits', lambda _: 0x88)
     data = _gen_request_data(properties=dict(
         dimensions={u'OS': u'Windows-3.1.1'}))
     request = task_request.make_request(data)
     result_summary = task_scheduler.schedule_request(request)
     reaped_request, run_result = task_scheduler.bot_reap_task(
         {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
     expected = (
         'Bot bot1 sent task kill for task 1d69b9f088008811 owned by bot '
         'localhost')
     self.assertEqual(expected,
                      task_scheduler.bot_kill_task(run_result.key, 'bot1'))
Beispiel #17
0
 def _task_deduped(
     self, new_ts, deduped_from, task_id='1d8dc670a0008810', now=None):
   data = _gen_request(
       name='yay',
       user='******',
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
   actual_request_2, run_result_2 = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(None, actual_request_2)
   result_summary_duped, run_results_duped = get_results(request.key)
   expected = {
     'abandoned_ts': None,
     'bot_dimensions': bot_dimensions,
     'bot_id': u'localhost',
     'bot_version': u'abc',
     'children_task_ids': [],
     'completed_ts': now or self.now,
     'costs_usd': [],
     'cost_saved_usd': 0.1,
     'created_ts': new_ts,
     'deduped_from': deduped_from,
     'durations': [0.1],
     'exit_codes': [0],
     'failure': False,
     'id': task_id,
     'internal_failure': False,
     # Only this value is updated to 'now', the rest uses the previous run
     # timestamps.
     'modified_ts': new_ts,
     'name': u'yay',
     'outputs_ref': None,
     # A deduped task cannot be deduped against.
     'properties_hash': None,
     'server_versions': [u'v1a'],
     'started_ts': now or self.now,
     'state': State.COMPLETED,
     'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Raoul'],
     'try_number': 0,
     'user': u'Raoul',
   }
   self.assertEqual(expected, result_summary_duped.to_dict())
   self.assertEqual([], run_results_duped)
Beispiel #18
0
 def test_bot_reap_task(self):
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   actual_request, run_result  = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(request, actual_request)
   self.assertEqual('localhost', run_result.bot_id)
   self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
Beispiel #19
0
 def test_bot_reap_task(self):
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   actual_request, run_result  = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(request, actual_request)
   self.assertEqual('localhost', run_result.bot_id)
   self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
Beispiel #20
0
 def test_cancel_task_running(self):
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
       pubsub_topic='projects/abc/topics/def')
   pub_sub_calls = self.mock_pub_sub()
   request = task_request.make_request(data, True)
   result_summary = task_scheduler.schedule_request(request)
   reaped_request, run_result = task_scheduler.bot_reap_task(
       {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
   ok, was_running = task_scheduler.cancel_task(result_summary.key)
   self.assertEqual(False, ok)
   self.assertEqual(True, was_running)
   result_summary = result_summary.key.get()
   self.assertEqual(task_result.State.RUNNING, result_summary.state)
   self.assertEqual(0, len(pub_sub_calls)) # no notifications
Beispiel #21
0
  def test_task_parent_isolated(self):
    request = task_request.make_request(
        _gen_request(
            properties={
              'commands': None,
              'dimensions': {u'OS': u'Windows-3.1.1'},
              'inputs_ref': {
                'isolated': '1' * 40,
                'isolatedserver': 'http://localhost:1',
                'namespace': 'default-gzip',
              },
            }),
        True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    actual_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, actual_request)
    self.assertEqual('localhost', run_result.bot_id)
    self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
    # It's important to terminate the task with success.
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    parent_id = run_result.task_id
    request = task_request.make_request(
        _gen_request(
            parent_task_id=parent_id,
            properties={'dimensions':{u'OS': u'Windows-3.1.1'}}),
        True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
Beispiel #22
0
  def test_task_parent_isolated(self):
    request = task_request.make_request(
        _gen_request(
            properties={
              'commands': None,
              'dimensions': {u'OS': u'Windows-3.1.1'},
              'inputs_ref': {
                'isolated': '1' * 40,
                'isolatedserver': 'http://localhost:1',
                'namespace': 'default-gzip',
              },
            }),
        True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    actual_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, actual_request)
    self.assertEqual('localhost', run_result.bot_id)
    self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
    # It's important to terminate the task with success.
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    parent_id = run_result.task_id
    request = task_request.make_request(
        _gen_request(
            parent_task_id=parent_id,
            properties={'dimensions':{u'OS': u'Windows-3.1.1'}}),
        True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
Beispiel #23
0
 def test_cron_handle_bot_died_ignored_expired(self):
     self.mock(random, 'getrandbits', lambda _: 0x88)
     data = _gen_request_data(
         properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
         scheduling_expiration_secs=600)
     request = task_request.make_request(data)
     _result_summary = task_scheduler.schedule_request(request)
     bot_dimensions = {
         u'OS': [u'Windows', u'Windows-3.1.1'],
         u'hostname': u'localhost',
         u'foo': u'bar',
     }
     _request, run_result = task_scheduler.bot_reap_task(
         bot_dimensions, 'localhost', 'abc')
     self.assertEqual(1, run_result.try_number)
     self.assertEqual(task_result.State.RUNNING, run_result.state)
     self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 601)
     self.assertEqual((1, 0, 0), task_scheduler.cron_handle_bot_died())
Beispiel #24
0
 def test_cron_handle_bot_died_ignored_expired(self):
   self.mock(random, 'getrandbits', lambda _: 0x88)
   data = _gen_request_data(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
       scheduling_expiration_secs=600)
   request = task_request.make_request(data)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   _request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(1, run_result.try_number)
   self.assertEqual(task_result.State.RUNNING, run_result.state)
   self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 601)
   self.assertEqual((1, 0, 0), task_scheduler.cron_handle_bot_died())
Beispiel #25
0
 def test_cron_handle_bot_died_ignored_expired(self):
   self.mock(random, 'getrandbits', lambda _: 0x88)
   now = utils.utcnow()
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
       created_ts=now,
       expiration_ts=now+datetime.timedelta(seconds=600))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   _request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(1, run_result.try_number)
   self.assertEqual(task_result.State.RUNNING, run_result.state)
   self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 601)
   self.assertEqual(
       (['1d69b9f088008811'], 0, 0),
       task_scheduler.cron_handle_bot_died('f.local'))
Beispiel #26
0
 def _task_ran_successfully(self):
   """Runs a task successfully and returns the task_id."""
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   actual_request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(request, actual_request)
   self.assertEqual('localhost', run_result.bot_id)
   self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
   # It's important to terminate the task with success.
   self.assertEqual(
       (True, True),
       task_scheduler.bot_update_task(
           run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
           0.1, None))
   return unicode(run_result.task_id)
Beispiel #27
0
 def _task_ran_successfully(self):
     """Runs a task successfully and returns the task_id."""
     data = _gen_request_data(properties=dict(
         dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
     request = task_request.make_request(data)
     _result_summary = task_scheduler.schedule_request(request)
     bot_dimensions = {
         u'OS': [u'Windows', u'Windows-3.1.1'],
         u'hostname': u'localhost',
         u'foo': u'bar',
     }
     actual_request, run_result = task_scheduler.bot_reap_task(
         bot_dimensions, 'localhost', 'abc')
     self.assertEqual(request, actual_request)
     self.assertEqual('localhost', run_result.bot_id)
     self.assertEqual(None,
                      task_to_run.TaskToRun.query().get().queue_number)
     # It's important to terminate the task with success.
     self.assertEqual(
         (True, True),
         task_scheduler.bot_update_task(run_result.key, 'localhost', 'Foo1',
                                        0, 0, 0.1, False, False, 0.1))
     return unicode(run_result.key_packed)
Beispiel #28
0
  def test_cron_handle_bot_died(self):
    # Test first retry, then success.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request_data(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        scheduling_expiration_secs=600)
    request = task_request.make_request(data)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(1, run_result.try_number)
    self.assertEqual(task_result.State.RUNNING, run_result.state)
    now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())

    # Refresh and compare:
    expected = {
      'abandoned_ts': now_1,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'cost_usd': 0.,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008811',
      'internal_failure': True,
      'modified_ts': now_1,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': task_result.State.BOT_DIED,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_1,
      'name': u'Request name',
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': None,
      'state': task_result.State.PENDING,
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())

    # Task was retried.
    now_2 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost-second', 'abc')
    logging.info('%s', [t.to_dict() for t in task_to_run.TaskToRun.query()])
    self.assertEqual(2, run_result.try_number)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost-second', 'Foo1', 0, 0, 0.1, False, False,
            0.1))
    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost-second',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': now_2,
      'costs_usd': [0., 0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_2,
      'name': u'Request name',
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': now_2,
      'state': task_result.State.COMPLETED,
      'try_number': 2,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())
    self.assertEqual(0.1, run_result.key.get().cost_usd)
Beispiel #29
0
  def test_exit_code_failure(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {'OS': 'Windows-3.1.1'}
    reaped_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, reaped_request)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 1, 0.1, False, False, 0.1,
            None))
    result_summary, run_results = get_results(request.key)

    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [1],
      'failure': True,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': self.now,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.COMPLETED,
      'try_number': 1,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())

    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': self.now,
        'cost_usd': 0.1,
        'durations': [0.1],
        'exit_codes': [1],
        'failure': True,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': self.now,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': self.now,
        'state': State.COMPLETED,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [t.to_dict() for t in run_results])
Beispiel #30
0
  def test_exit_code_failure(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {'OS': 'Windows-3.1.1'}
    reaped_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, reaped_request)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 1, 0.1, False, False, 0.1,
            None))
    result_summary, run_results = get_results(request.key)

    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [1],
      'failure': True,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': self.now,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.COMPLETED,
      'try_number': 1,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())

    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': self.now,
        'cost_usd': 0.1,
        'durations': [0.1],
        'exit_codes': [1],
        'failure': True,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': self.now,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': self.now,
        'state': State.COMPLETED,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [t.to_dict() for t in run_results])
Beispiel #31
0
  def test_get_results(self):
    # TODO(maruel): Split in more focused tests.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    created_ts = self.now
    self.mock_now(created_ts)
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)

    # The TaskRequest was enqueued, the TaskResultSummary was created but no
    # TaskRunResult exist yet since the task was not scheduled on any bot.
    result_summary, run_results = get_results(request.key)
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': None,
      'bot_id': None,
      'bot_version': None,
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [],
      'cost_saved_usd': None,
      'created_ts': created_ts,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': created_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [],
      'started_ts': None,
      'state': State.PENDING,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': None,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())
    self.assertEqual([], run_results)

    # A bot reaps the TaskToRun.
    reaped_ts = self.now + datetime.timedelta(seconds=60)
    self.mock_now(reaped_ts)
    bot_dimensions = {u'OS': u'Windows-3.1.1'}
    reaped_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, reaped_request)
    self.assertTrue(run_result)
    result_summary, run_results = get_results(request.key)
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': created_ts,  # Time the TaskRequest was created.
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': reaped_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': reaped_ts,
      'state': State.RUNNING,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())
    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': None,
        'cost_usd': 0.,
        'durations': [],
        'exit_codes': [],
        'failure': False,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': reaped_ts,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': reaped_ts,
        'state': State.RUNNING,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [i.to_dict() for i in run_results])

    # The bot completes the task.
    done_ts = self.now + datetime.timedelta(seconds=120)
    self.mock_now(done_ts)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))
    self.assertEqual(
        (True, False),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Bar22', 0, 0, 0.2, False, False, 0.1,
            None))
    result_summary, run_results = get_results(request.key)
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': done_ts,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': created_ts,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': done_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': reaped_ts,
      'state': State.COMPLETED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())
    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': done_ts,
        'cost_usd': 0.1,
        'durations': [0.1],
        'exit_codes': [0],
        'failure': False,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': done_ts,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': reaped_ts,
        'state': State.COMPLETED,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [t.to_dict() for t in run_results])
Beispiel #32
0
    def test_cron_handle_bot_died_same_bot_denied(self):
        # Test first retry, then success.
        self.mock(random, 'getrandbits', lambda _: 0x88)
        data = _gen_request_data(
            properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
            scheduling_expiration_secs=600)
        request = task_request.make_request(data)
        _result_summary = task_scheduler.schedule_request(request)
        bot_dimensions = {
            u'OS': [u'Windows', u'Windows-3.1.1'],
            u'hostname': u'localhost',
            u'foo': u'bar',
        }
        _request, run_result = task_scheduler.bot_reap_task(
            bot_dimensions, 'localhost', 'abc')
        self.assertEqual(1, run_result.try_number)
        self.assertEqual(task_result.State.RUNNING, run_result.state)
        now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
        self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())

        # Refresh and compare:
        expected = {
            'abandoned_ts': now_1,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': None,
            'cost_usd': 0.,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008811',
            'internal_failure': True,
            'modified_ts': now_1,
            'server_versions': [u'v1a'],
            'started_ts': self.now,
            'state': task_result.State.BOT_DIED,
            'try_number': 1,
        }
        self.assertEqual(expected, run_result.key.get().to_dict())
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [0.],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': now_1,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': None,
            'state': task_result.State.PENDING,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected,
                         run_result.result_summary_key.get().to_dict())

        # Task was retried but the same bot polls again, it's denied the task.
        now_2 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
        request, run_result = task_scheduler.bot_reap_task(
            bot_dimensions, 'localhost', 'abc')
        self.assertEqual(None, request)
        self.assertEqual(None, run_result)
        logging.info('%s',
                     [t.to_dict() for t in task_to_run.TaskToRun.query()])
Beispiel #33
0
  def test_cron_handle_bot_died(self):
    # Test first retry, then success.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    now = utils.utcnow()
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        created_ts=now,
        expiration_ts=now+datetime.timedelta(seconds=600))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(1, run_result.try_number)
    self.assertEqual(task_result.State.RUNNING, run_result.state)
    now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())

    # Refresh and compare:
    expected = {
      'abandoned_ts': now_1,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'cost_usd': 0.,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008811',
      'internal_failure': True,
      'modified_ts': now_1,
      'outputs_ref': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': task_result.State.BOT_DIED,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_1,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': None,
      'state': task_result.State.PENDING,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())

    # Task was retried.
    now_2 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost-second', 'abc')
    logging.info('%s', [t.to_dict() for t in task_to_run.TaskToRun.query()])
    self.assertEqual(2, run_result.try_number)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost-second', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost-second',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': now_2,
      'costs_usd': [0., 0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_2,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': now_2,
      'state': task_result.State.COMPLETED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 2,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())
    self.assertEqual(0.1, run_result.key.get().cost_usd)
Beispiel #34
0
    def post(self):
        """Handles a polling request.

    Be very permissive on missing values. This can happen because of errors
    on the bot, *we don't want to deny them the capacity to update*, so that the
    bot code is eventually fixed and the bot self-update to this working code.

    It makes recovery of the fleet in case of catastrophic failure much easier.
    """
        (_request, bot_id, version, state, dimensions,
         quarantined_msg) = self._process()
        sleep_streak = state.get('sleep_streak', 0)
        quarantined = bool(quarantined_msg)

        # Note bot existence at two places, one for stats at 1 minute resolution,
        # the other for the list of known bots.
        action = 'bot_inactive' if quarantined else 'bot_active'
        stats.add_entry(action=action, bot_id=bot_id, dimensions=dimensions)

        def bot_event(event_type, task_id=None, task_name=None):
            bot_management.bot_event(event_type=event_type,
                                     bot_id=bot_id,
                                     external_ip=self.request.remote_addr,
                                     dimensions=dimensions,
                                     state=state,
                                     version=version,
                                     quarantined=quarantined,
                                     task_id=task_id,
                                     task_name=task_name,
                                     message=quarantined_msg)

        # Bot version is host-specific because the host URL is embedded in
        # swarming_bot.zip
        expected_version = bot_code.get_bot_version(self.request.host_url)
        if version != expected_version:
            bot_event('request_update')
            self._cmd_update(expected_version)
            return
        if quarantined:
            bot_event('request_sleep')
            self._cmd_sleep(sleep_streak, quarantined)
            return

        #
        # At that point, the bot should be in relatively good shape since it's
        # running the right version. It is still possible that invalid code was
        # pushed to the server, so be diligent about it.
        #

        # Bot may need a reboot if it is running for too long. We do not reboot
        # quarantined bots.
        needs_restart, restart_message = bot_management.should_restart_bot(
            bot_id, state)
        if needs_restart:
            bot_event('request_restart')
            self._cmd_restart(restart_message)
            return

        # The bot is in good shape. Try to grab a task.
        try:
            # This is a fairly complex function call, exceptions are expected.
            request, run_result = task_scheduler.bot_reap_task(
                dimensions, bot_id, version)
            if not request:
                # No task found, tell it to sleep a bit.
                bot_event('request_sleep')
                self._cmd_sleep(sleep_streak, quarantined)
                return

            try:
                # This part is tricky since it intentionally runs a transaction after
                # another one.
                if request.properties.is_terminate:
                    bot_event('bot_terminate', task_id=run_result.task_id)
                    self._cmd_terminate(run_result.task_id)
                else:
                    bot_event('request_task',
                              task_id=run_result.task_id,
                              task_name=request.name)
                    self._cmd_run(request, run_result.key, bot_id)
            except:
                logging.exception('Dang, exception after reaping')
                raise
        except runtime.DeadlineExceededError:
            # If the timeout happened before a task was assigned there is no problems.
            # If the timeout occurred after a task was assigned, that task will
            # timeout (BOT_DIED) since the bot didn't get the details required to
            # run it) and it will automatically get retried (TODO) when the task times
            # out.
            # TODO(maruel): Note the task if possible and hand it out on next poll.
            # https://code.google.com/p/swarming/issues/detail?id=130
            self.abort(500, 'Deadline')
Beispiel #35
0
  def test_cron_handle_bot_died_same_bot_denied(self):
    # Test first retry, then success.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    now = utils.utcnow()
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        created_ts=now,
        expiration_ts=now+datetime.timedelta(seconds=600))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(1, run_result.try_number)
    self.assertEqual(task_result.State.RUNNING, run_result.state)
    now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual(([], 1, 0), task_scheduler.cron_handle_bot_died('f.local'))

    # Refresh and compare:
    expected = {
      'abandoned_ts': now_1,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'cost_usd': 0.,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008811',
      'internal_failure': True,
      'modified_ts': now_1,
      'outputs_ref': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': task_result.State.BOT_DIED,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_1,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': None,
      'state': task_result.State.PENDING,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())

    # Task was retried but the same bot polls again, it's denied the task.
    now_2 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
    request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(None, request)
    self.assertEqual(None, run_result)
    logging.info('%s', [t.to_dict() for t in task_to_run.TaskToRun.query()])
Beispiel #36
0
    def test_get_results(self):
        # TODO(maruel): Split in more focused tests.
        self.mock(random, 'getrandbits', lambda _: 0x88)
        created_ts = self.now
        self.mock_now(created_ts)
        data = _gen_request_data(properties=dict(
            dimensions={u'OS': u'Windows-3.1.1'}))
        request = task_request.make_request(data)
        _result_summary = task_scheduler.schedule_request(request)

        # The TaskRequest was enqueued, the TaskResultSummary was created but no
        # TaskRunResult exist yet since the task was not scheduled on any bot.
        result_summary, run_results = get_results(request.key)
        expected = {
            'abandoned_ts': None,
            'bot_id': None,
            'bot_version': None,
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [],
            'cost_saved_usd': None,
            'created_ts': created_ts,
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': created_ts,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [],
            'started_ts': None,
            'state': State.PENDING,
            'try_number': None,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.to_dict())
        self.assertEqual([], run_results)

        # A bot reaps the TaskToRun.
        reaped_ts = self.now + datetime.timedelta(seconds=60)
        self.mock_now(reaped_ts)
        reaped_request, run_result = task_scheduler.bot_reap_task(
            {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
        self.assertEqual(request, reaped_request)
        self.assertTrue(run_result)
        result_summary, run_results = get_results(request.key)
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [0.],
            'cost_saved_usd': None,
            'created_ts': created_ts,  # Time the TaskRequest was created.
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': reaped_ts,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': reaped_ts,
            'state': State.RUNNING,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.to_dict())
        expected = [
            {
                'abandoned_ts': None,
                'bot_id': u'localhost',
                'bot_version': u'abc',
                'children_task_ids': [],
                'completed_ts': None,
                'cost_usd': 0.,
                'durations': [],
                'exit_codes': [],
                'failure': False,
                'id': '1d69b9f088008811',
                'internal_failure': False,
                'modified_ts': reaped_ts,
                'server_versions': [u'v1a'],
                'started_ts': reaped_ts,
                'state': State.RUNNING,
                'try_number': 1,
            },
        ]
        self.assertEqual(expected, [i.to_dict() for i in run_results])

        # The bot completes the task.
        done_ts = self.now + datetime.timedelta(seconds=120)
        self.mock_now(done_ts)
        self.assertEqual(
            (True, True),
            task_scheduler.bot_update_task(run_result.key, 'localhost', 'Foo1',
                                           0, 0, 0.1, False, False, 0.1))
        self.assertEqual(
            (True, False),
            task_scheduler.bot_update_task(run_result.key, 'localhost',
                                           'Bar22', 0, 0, 0.2, False, False,
                                           0.1))
        result_summary, run_results = get_results(request.key)
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': done_ts,
            'costs_usd': [0.1],
            'cost_saved_usd': None,
            'created_ts': created_ts,
            'deduped_from': None,
            'durations': [0.1, 0.2],
            'exit_codes': [0, 0],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': done_ts,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': reaped_ts,
            'state': State.COMPLETED,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.to_dict())
        expected = [
            {
                'abandoned_ts': None,
                'bot_id': u'localhost',
                'bot_version': u'abc',
                'children_task_ids': [],
                'completed_ts': done_ts,
                'cost_usd': 0.1,
                'durations': [0.1, 0.2],
                'exit_codes': [0, 0],
                'failure': False,
                'id': '1d69b9f088008811',
                'internal_failure': False,
                'modified_ts': done_ts,
                'server_versions': [u'v1a'],
                'started_ts': reaped_ts,
                'state': State.COMPLETED,
                'try_number': 1,
            },
        ]
        self.assertEqual(expected, [t.to_dict() for t in run_results])
Beispiel #37
0
    def post(self):
        """Handles a polling request.

    Be very permissive on missing values. This can happen because of errors
    on the bot, *we don't want to deny them the capacity to update*, so that the
    bot code is eventually fixed and the bot self-update to this working code.

    It makes recovery of the fleet in case of catastrophic failure much easier.
    """
        logging.debug('Request started')
        if config.settings().force_bots_to_sleep_and_not_run_task:
            # Ignore everything, just sleep. Tell the bot it is quarantined to inform
            # it that it won't be running anything anyway. Use a large streak so it
            # will sleep for 60s.
            self._cmd_sleep(1000, True)
            return

        res = self._process()
        sleep_streak = res.state.get('sleep_streak', 0)
        quarantined = bool(res.quarantined_msg)

        # Note bot existence at two places, one for stats at 1 minute resolution,
        # the other for the list of known bots.

        def bot_event(event_type, task_id=None, task_name=None):
            bot_management.bot_event(
                event_type=event_type,
                bot_id=res.bot_id,
                external_ip=self.request.remote_addr,
                authenticated_as=auth.get_peer_identity().to_bytes(),
                dimensions=res.dimensions,
                state=res.state,
                version=res.version,
                quarantined=quarantined,
                maintenance_msg=res.maintenance_msg,
                task_id=task_id,
                task_name=task_name,
                message=res.quarantined_msg)

        # Bot version is host-specific because the host URL is embedded in
        # swarming_bot.zip
        logging.debug('Fetching bot code version')
        expected_version, _ = bot_code.get_bot_version(self.request.host_url)
        if res.version != expected_version:
            bot_event('request_update')
            self._cmd_update(expected_version)
            return
        if quarantined:
            bot_event('request_sleep')
            self._cmd_sleep(sleep_streak, quarantined)
            return

        # If the server-side per-bot config for the bot has changed, we need
        # to restart this particular bot, so it picks up new config in /handshake.
        # Do this check only for bots that know about server-side per-bot configs
        # already (such bots send 'bot_group_cfg_version' state attribute).
        cur_bot_cfg_ver = res.state.get('bot_group_cfg_version')
        if cur_bot_cfg_ver and cur_bot_cfg_ver != res.bot_group_cfg.version:
            bot_event('request_restart')
            self._cmd_bot_restart('Restarting to pick up new bots.cfg config')
            return

        #
        # At that point, the bot should be in relatively good shape since it's
        # running the right version. It is still possible that invalid code was
        # pushed to the server, so be diligent about it.
        #

        # If a bot advertise itself with a key state 'maintenance', do not give
        # a task to it until this key is removed.
        #
        # It's an 'hack' because this is not listed in the DB as a separate state,
        # which hinders system monitoring. See bot_management.BotInfo. In practice,
        # ts_mon_metrics.py can look a BotInfo.get('maintenance') to determine if a
        # bot is in maintenance or idle.
        if res.state.get('maintenance'):
            bot_event('request_sleep')
            # Tell the bot it's considered quarantined.
            self._cmd_sleep(sleep_streak, True)
            return

        # The bot is in good shape. Try to grab a task.
        try:
            # This is a fairly complex function call, exceptions are expected.
            request, secret_bytes, run_result = task_scheduler.bot_reap_task(
                res.dimensions, res.version, res.lease_expiration_ts)
            if not request:
                # No task found, tell it to sleep a bit.
                bot_event('request_sleep')
                self._cmd_sleep(sleep_streak, quarantined)
                return

            try:
                # This part is tricky since it intentionally runs a transaction after
                # another one.
                if request.task_slice(
                        run_result.current_task_slice).properties.is_terminate:
                    bot_event('bot_terminate', task_id=run_result.task_id)
                    self._cmd_terminate(run_result.task_id)
                else:
                    bot_event('request_task',
                              task_id=run_result.task_id,
                              task_name=request.name)
                    self._cmd_run(request, secret_bytes, run_result,
                                  res.bot_id, res.bot_group_cfg)
            except:
                logging.exception('Dang, exception after reaping')
                raise
        except runtime.DeadlineExceededError:
            # If the timeout happened before a task was assigned there is no problems.
            # If the timeout occurred after a task was assigned, that task will
            # timeout (BOT_DIED) since the bot didn't get the details required to
            # run it) and it will automatically get retried (TODO) when the task times
            # out.
            # TODO(maruel): Note the task if possible and hand it out on next poll.
            # https://code.google.com/p/swarming/issues/detail?id=130
            self.abort(500, 'Deadline')
Beispiel #38
0
    def post(self):
        """Handles a polling request.

    Be very permissive on missing values. This can happen because of errors
    on the bot, *we don't want to deny them the capacity to update*, so that the
    bot code is eventually fixed and the bot self-update to this working code.

    It makes recovery of the fleet in case of catastrophic failure much easier.
    """
        (_request, bot_id, version, state, dimensions, quarantined_msg) = self._process()
        sleep_streak = state.get("sleep_streak", 0)
        quarantined = bool(quarantined_msg)

        # Note bot existence at two places, one for stats at 1 minute resolution,
        # the other for the list of known bots.
        action = "bot_inactive" if quarantined else "bot_active"
        stats.add_entry(action=action, bot_id=bot_id, dimensions=dimensions)

        def bot_event(event_type, task_id=None, task_name=None):
            bot_management.bot_event(
                event_type=event_type,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                dimensions=dimensions,
                state=state,
                version=version,
                quarantined=quarantined,
                task_id=task_id,
                task_name=task_name,
                message=quarantined_msg,
            )

        # Bot version is host-specific because the host URL is embedded in
        # swarming_bot.zip
        expected_version = bot_code.get_bot_version(self.request.host_url)
        if version != expected_version:
            bot_event("request_update")
            self._cmd_update(expected_version)
            return
        if quarantined:
            bot_event("request_sleep")
            self._cmd_sleep(sleep_streak, quarantined)
            return

        #
        # At that point, the bot should be in relatively good shape since it's
        # running the right version. It is still possible that invalid code was
        # pushed to the server, so be diligent about it.
        #

        # Bot may need a reboot if it is running for too long. We do not reboot
        # quarantined bots.
        needs_restart, restart_message = bot_management.should_restart_bot(bot_id, state)
        if needs_restart:
            bot_event("request_restart")
            self._cmd_restart(restart_message)
            return

        # The bot is in good shape. Try to grab a task.
        try:
            # This is a fairly complex function call, exceptions are expected.
            request, run_result = task_scheduler.bot_reap_task(dimensions, bot_id, version)
            if not request:
                # No task found, tell it to sleep a bit.
                bot_event("request_sleep")
                self._cmd_sleep(sleep_streak, quarantined)
                return

            try:
                # This part is tricky since it intentionally runs a transaction after
                # another one.
                if request.properties.is_terminate:
                    bot_event("bot_terminate", task_id=run_result.task_id)
                    self._cmd_terminate(run_result.task_id)
                else:
                    bot_event("request_task", task_id=run_result.task_id, task_name=request.name)
                    self._cmd_run(request, run_result.key, bot_id)
            except:
                logging.exception("Dang, exception after reaping")
                raise
        except runtime.DeadlineExceededError:
            # If the timeout happened before a task was assigned there is no problems.
            # If the timeout occurred after a task was assigned, that task will
            # timeout (BOT_DIED) since the bot didn't get the details required to
            # run it) and it will automatically get retried (TODO) when the task times
            # out.
            # TODO(maruel): Note the task if possible and hand it out on next poll.
            # https://code.google.com/p/swarming/issues/detail?id=130
            self.abort(500, "Deadline")