Example #1
0
  def test_bot_update_pubsub_error(self):
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        pubsub_topic='projects/abc/topics/def')
    request = task_request.make_request(data, True)
    task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual('localhost', run_result.bot_id)

    # Attempt to terminate the task with success, but make PubSub call fail.
    self.mock_pub_sub(publish_successful=False)
    self.assertEqual(
        (False, False),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    # Bot retries bot_update, now PubSub works and notification is sent.
    pub_sub_calls = self.mock_pub_sub(publish_successful=True)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))
    self.assertEqual(1, len(pub_sub_calls)) # notification is sent
Example #2
0
 def test_bot_update_task_new_overwrite(self):
     run_result = _quick_reap()
     self.assertEqual(
         (True, False),
         task_scheduler.bot_update_task(run_result.key, 'localhost', 'hi',
                                        0, None, None, False, False, 0.1))
     self.assertEqual(
         (True, False),
         task_scheduler.bot_update_task(run_result.key, 'localhost', 'hey',
                                        1, None, None, False, False, 0.1))
     self.assertEqual(['hhey'], list(run_result.key.get().get_outputs()))
Example #3
0
 def test_bot_update_task(self):
   run_result = _quick_reap()
   self.assertEqual(
       (True, True),
       task_scheduler.bot_update_task(
           run_result.key, 'localhost', 'hi', 0, 0, 0.1, False, False, 0.1))
   self.assertEqual(
       (True, False),
       task_scheduler.bot_update_task(
           run_result.key, 'localhost', 'hey', 2, 0, 0.1, False, False,
           0.1))
   self.assertEqual(['hihey'], list(run_result.key.get().get_outputs()))
Example #4
0
 def test_bot_update_task_new_overwrite(self):
   run_result = _quick_reap()
   self.assertEqual(
       (True, False),
       task_scheduler.bot_update_task(
           run_result.key, 'localhost', 'hi', 0, None, None, False, False,
           0.1, None))
   self.assertEqual(
       (True, False),
       task_scheduler.bot_update_task(
           run_result.key, 'localhost', 'hey', 1, None, None, False, False,
           0.1, None))
   self.assertEqual(['hhey'], list(run_result.key.get().get_outputs()))
Example #5
0
  def _bot_update_timeouts(self, hard, io):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request_data(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data)
    result_summary = task_scheduler.schedule_request(request)
    reaped_request, run_result = task_scheduler.bot_reap_task(
        {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'hi', 0, 0, 0.1, hard, io, 0.1))
    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': True,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': self.now,
      'name': u'Request name',
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.TIMED_OUT,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.key.get().to_dict())

    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'cost_usd': 0.1,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': True,
      'id': '1d69b9f088008811',
      'internal_failure': False,
      'modified_ts': self.now,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.TIMED_OUT,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
Example #6
0
    def _bot_update_timeouts(self, hard, io):
        self.mock(random, 'getrandbits', lambda _: 0x88)
        data = _gen_request_data(properties=dict(
            dimensions={u'OS': u'Windows-3.1.1'}))
        request = task_request.make_request(data)
        result_summary = task_scheduler.schedule_request(request)
        reaped_request, run_result = task_scheduler.bot_reap_task(
            {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
        self.assertEqual(
            (True, True),
            task_scheduler.bot_update_task(run_result.key, 'localhost', 'hi',
                                           0, 0, 0.1, hard, io, 0.1))
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': self.now,
            'costs_usd': [0.1],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [0.1],
            'exit_codes': [0],
            'failure': True,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': self.now,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': self.now,
            'state': State.TIMED_OUT,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.key.get().to_dict())

        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': self.now,
            'cost_usd': 0.1,
            'durations': [0.1],
            'exit_codes': [0],
            'failure': True,
            'id': '1d69b9f088008811',
            'internal_failure': False,
            'modified_ts': self.now,
            'server_versions': [u'v1a'],
            'started_ts': self.now,
            'state': State.TIMED_OUT,
            'try_number': 1,
        }
        self.assertEqual(expected, run_result.key.get().to_dict())
Example #7
0
  def test_bot_update_exception(self):
    run_result = _quick_reap()
    def r(*_):
      raise datastore_utils.CommitError('Sorry!')

    self.mock(ndb, 'put_multi', r)
    self.assertEqual(
        (False, False),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'hi', 0, 0, 0.1, False, False, 0.1))
Example #8
0
    def test_bot_update_exception(self):
        run_result = _quick_reap()

        def r(*_):
            raise datastore_utils.CommitError('Sorry!')

        self.mock(ndb, 'put_multi', r)
        self.assertEqual(
            (False, False),
            task_scheduler.bot_update_task(run_result.key, 'localhost', 'hi',
                                           0, 0, 0.1, False, False, 0.1))
Example #9
0
  def test_task_parent_isolated(self):
    request = task_request.make_request(
        _gen_request(
            properties={
              'commands': None,
              'dimensions': {u'OS': u'Windows-3.1.1'},
              'inputs_ref': {
                'isolated': '1' * 40,
                'isolatedserver': 'http://localhost:1',
                'namespace': 'default-gzip',
              },
            }),
        True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    actual_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, actual_request)
    self.assertEqual('localhost', run_result.bot_id)
    self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
    # It's important to terminate the task with success.
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    parent_id = run_result.task_id
    request = task_request.make_request(
        _gen_request(
            parent_task_id=parent_id,
            properties={'dimensions':{u'OS': u'Windows-3.1.1'}}),
        True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
Example #10
0
  def test_task_parent_isolated(self):
    request = task_request.make_request(
        _gen_request(
            properties={
              'commands': None,
              'dimensions': {u'OS': u'Windows-3.1.1'},
              'inputs_ref': {
                'isolated': '1' * 40,
                'isolatedserver': 'http://localhost:1',
                'namespace': 'default-gzip',
              },
            }),
        True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    actual_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, actual_request)
    self.assertEqual('localhost', run_result.bot_id)
    self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
    # It's important to terminate the task with success.
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))

    parent_id = run_result.task_id
    request = task_request.make_request(
        _gen_request(
            parent_task_id=parent_id,
            properties={'dimensions':{u'OS': u'Windows-3.1.1'}}),
        True)
    result_summary = task_scheduler.schedule_request(request)
    self.assertEqual([], result_summary.children_task_ids)
    self.assertEqual(parent_id, request.parent_task_id)

    parent_run_result_key = task_pack.unpack_run_result_key(parent_id)
    parent_res_summary_key = task_pack.run_result_key_to_result_summary_key(
        parent_run_result_key)
    expected = [result_summary.task_id]
    self.assertEqual(expected, parent_run_result_key.get().children_task_ids)
    self.assertEqual(expected, parent_res_summary_key.get().children_task_ids)
Example #11
0
 def _task_ran_successfully(self):
     """Runs a task successfully and returns the task_id."""
     data = _gen_request_data(properties=dict(
         dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
     request = task_request.make_request(data)
     _result_summary = task_scheduler.schedule_request(request)
     bot_dimensions = {
         u'OS': [u'Windows', u'Windows-3.1.1'],
         u'hostname': u'localhost',
         u'foo': u'bar',
     }
     actual_request, run_result = task_scheduler.bot_reap_task(
         bot_dimensions, 'localhost', 'abc')
     self.assertEqual(request, actual_request)
     self.assertEqual('localhost', run_result.bot_id)
     self.assertEqual(None,
                      task_to_run.TaskToRun.query().get().queue_number)
     # It's important to terminate the task with success.
     self.assertEqual(
         (True, True),
         task_scheduler.bot_update_task(run_result.key, 'localhost', 'Foo1',
                                        0, 0, 0.1, False, False, 0.1))
     return unicode(run_result.key_packed)
Example #12
0
 def _task_ran_successfully(self):
   """Runs a task successfully and returns the task_id."""
   data = _gen_request(
       properties=dict(dimensions={u'OS': u'Windows-3.1.1'}, idempotent=True))
   request = task_request.make_request(data, True)
   _result_summary = task_scheduler.schedule_request(request)
   bot_dimensions = {
     u'OS': [u'Windows', u'Windows-3.1.1'],
     u'hostname': u'localhost',
     u'foo': u'bar',
   }
   actual_request, run_result = task_scheduler.bot_reap_task(
       bot_dimensions, 'localhost', 'abc')
   self.assertEqual(request, actual_request)
   self.assertEqual('localhost', run_result.bot_id)
   self.assertEqual(None, task_to_run.TaskToRun.query().get().queue_number)
   # It's important to terminate the task with success.
   self.assertEqual(
       (True, True),
       task_scheduler.bot_update_task(
           run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
           0.1, None))
   return unicode(run_result.task_id)
Example #13
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        task_id = request['task_id']

        machine_type = None
        bot_info = bot_management.get_info_key(bot_id).get()
        if bot_info:
            machine_type = bot_info.machine_type

        # Make sure bot self-reported ID matches the authentication token. Raises
        # auth.AuthorizationError if not.
        bot_auth.validate_bot_id_and_fetch_config(bot_id, machine_type)

        bot_overhead = request.get('bot_overhead')
        cipd_pins = request.get('cipd_pins')
        cipd_stats = request.get('cipd_stats')
        cost_usd = request.get('cost_usd', 0)
        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        isolated_stats = request.get('isolated_stats')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        if (isolated_stats or cipd_stats) and bot_overhead is None:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' %
                                   task_id)
            self.abort_with_error(
                400,
                error=
                'isolated_stats and cipd_stats require bot_overhead to be set'
                '\nbot_overhead: %s\nisolate_stats: %s' %
                (bot_overhead, isolated_stats))

        run_result_key = task_pack.unpack_run_result_key(task_id)
        performance_stats = None
        if bot_overhead is not None:
            performance_stats = task_result.PerformanceStats(
                bot_overhead=bot_overhead)
            if isolated_stats:
                download = isolated_stats.get('download') or {}
                upload = isolated_stats.get('upload') or {}

                def unpack_base64(d, k):
                    x = d.get(k)
                    if x:
                        return base64.b64decode(x)

                performance_stats.isolated_download = task_result.OperationStats(
                    duration=download.get('duration'),
                    initial_number_items=download.get('initial_number_items'),
                    initial_size=download.get('initial_size'),
                    items_cold=unpack_base64(download, 'items_cold'),
                    items_hot=unpack_base64(download, 'items_hot'))
                performance_stats.isolated_upload = task_result.OperationStats(
                    duration=upload.get('duration'),
                    items_cold=unpack_base64(upload, 'items_cold'),
                    items_hot=unpack_base64(upload, 'items_hot'))
            if cipd_stats:
                performance_stats.package_installation = task_result.OperationStats(
                    duration=cipd_stats.get('duration'))

        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)
        if outputs_ref:
            outputs_ref = task_request.FilesRef(**outputs_ref)

        if cipd_pins:
            cipd_pins = task_result.CipdPins(
                client_package=task_request.CipdPackage(
                    **cipd_pins['client_package']),
                packages=[
                    task_request.CipdPackage(**args)
                    for args in cipd_pins['packages']
                ])

        try:
            state = task_scheduler.bot_update_task(
                run_result_key=run_result_key,
                bot_id=bot_id,
                output=output,
                output_chunk_start=output_chunk_start,
                exit_code=exit_code,
                duration=duration,
                hard_timeout=hard_timeout,
                io_timeout=io_timeout,
                cost_usd=cost_usd,
                outputs_ref=outputs_ref,
                cipd_pins=cipd_pins,
                performance_stats=performance_stats)
            if not state:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            if state in (task_result.State.COMPLETED,
                         task_result.State.TIMED_OUT):
                action = 'task_completed'
            elif state == task_result.State.KILLED:
                action = 'task_killed'
            else:
                assert state in (task_result.State.BOT_DIED,
                                 task_result.State.RUNNING), state
                action = 'task_update'
            bot_management.bot_event(
                event_type=action,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                authenticated_as=auth.get_peer_identity().to_bytes(),
                dimensions=None,
                state=None,
                version=None,
                quarantined=None,
                maintenance_msg=None,
                task_id=task_id,
                task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))
        self.send_response({
            'must_stop': state == task_result.State.KILLED,
            'ok': True
        })
Example #14
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        cost_usd = request['cost_usd']
        task_id = request['task_id']

        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        run_result_key = task_pack.unpack_run_result_key(task_id)
        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)

        try:
            success, completed = task_scheduler.bot_update_task(
                run_result_key, bot_id, output, output_chunk_start, exit_code,
                duration, hard_timeout, io_timeout, cost_usd, outputs_ref)
            if not success:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            action = 'task_completed' if completed else 'task_update'
            bot_management.bot_event(event_type=action,
                                     bot_id=bot_id,
                                     external_ip=self.request.remote_addr,
                                     dimensions=None,
                                     state=None,
                                     version=None,
                                     quarantined=None,
                                     task_id=task_id,
                                     task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({'ok': True})
Example #15
0
    def test_get_results(self):
        # TODO(maruel): Split in more focused tests.
        self.mock(random, 'getrandbits', lambda _: 0x88)
        created_ts = self.now
        self.mock_now(created_ts)
        data = _gen_request_data(properties=dict(
            dimensions={u'OS': u'Windows-3.1.1'}))
        request = task_request.make_request(data)
        _result_summary = task_scheduler.schedule_request(request)

        # The TaskRequest was enqueued, the TaskResultSummary was created but no
        # TaskRunResult exist yet since the task was not scheduled on any bot.
        result_summary, run_results = get_results(request.key)
        expected = {
            'abandoned_ts': None,
            'bot_id': None,
            'bot_version': None,
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [],
            'cost_saved_usd': None,
            'created_ts': created_ts,
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': created_ts,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [],
            'started_ts': None,
            'state': State.PENDING,
            'try_number': None,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.to_dict())
        self.assertEqual([], run_results)

        # A bot reaps the TaskToRun.
        reaped_ts = self.now + datetime.timedelta(seconds=60)
        self.mock_now(reaped_ts)
        reaped_request, run_result = task_scheduler.bot_reap_task(
            {'OS': 'Windows-3.1.1'}, 'localhost', 'abc')
        self.assertEqual(request, reaped_request)
        self.assertTrue(run_result)
        result_summary, run_results = get_results(request.key)
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [0.],
            'cost_saved_usd': None,
            'created_ts': created_ts,  # Time the TaskRequest was created.
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': reaped_ts,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': reaped_ts,
            'state': State.RUNNING,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.to_dict())
        expected = [
            {
                'abandoned_ts': None,
                'bot_id': u'localhost',
                'bot_version': u'abc',
                'children_task_ids': [],
                'completed_ts': None,
                'cost_usd': 0.,
                'durations': [],
                'exit_codes': [],
                'failure': False,
                'id': '1d69b9f088008811',
                'internal_failure': False,
                'modified_ts': reaped_ts,
                'server_versions': [u'v1a'],
                'started_ts': reaped_ts,
                'state': State.RUNNING,
                'try_number': 1,
            },
        ]
        self.assertEqual(expected, [i.to_dict() for i in run_results])

        # The bot completes the task.
        done_ts = self.now + datetime.timedelta(seconds=120)
        self.mock_now(done_ts)
        self.assertEqual(
            (True, True),
            task_scheduler.bot_update_task(run_result.key, 'localhost', 'Foo1',
                                           0, 0, 0.1, False, False, 0.1))
        self.assertEqual(
            (True, False),
            task_scheduler.bot_update_task(run_result.key, 'localhost',
                                           'Bar22', 0, 0, 0.2, False, False,
                                           0.1))
        result_summary, run_results = get_results(request.key)
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': done_ts,
            'costs_usd': [0.1],
            'cost_saved_usd': None,
            'created_ts': created_ts,
            'deduped_from': None,
            'durations': [0.1, 0.2],
            'exit_codes': [0, 0],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': done_ts,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': reaped_ts,
            'state': State.COMPLETED,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected, result_summary.to_dict())
        expected = [
            {
                'abandoned_ts': None,
                'bot_id': u'localhost',
                'bot_version': u'abc',
                'children_task_ids': [],
                'completed_ts': done_ts,
                'cost_usd': 0.1,
                'durations': [0.1, 0.2],
                'exit_codes': [0, 0],
                'failure': False,
                'id': '1d69b9f088008811',
                'internal_failure': False,
                'modified_ts': done_ts,
                'server_versions': [u'v1a'],
                'started_ts': reaped_ts,
                'state': State.COMPLETED,
                'try_number': 1,
            },
        ]
        self.assertEqual(expected, [t.to_dict() for t in run_results])
Example #16
0
  def test_exit_code_failure(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {'OS': 'Windows-3.1.1'}
    reaped_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, reaped_request)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 1, 0.1, False, False, 0.1,
            None))
    result_summary, run_results = get_results(request.key)

    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [1],
      'failure': True,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': self.now,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.COMPLETED,
      'try_number': 1,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())

    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': self.now,
        'cost_usd': 0.1,
        'durations': [0.1],
        'exit_codes': [1],
        'failure': True,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': self.now,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': self.now,
        'state': State.COMPLETED,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [t.to_dict() for t in run_results])
Example #17
0
  def test_get_results(self):
    # TODO(maruel): Split in more focused tests.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    created_ts = self.now
    self.mock_now(created_ts)
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)

    # The TaskRequest was enqueued, the TaskResultSummary was created but no
    # TaskRunResult exist yet since the task was not scheduled on any bot.
    result_summary, run_results = get_results(request.key)
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': None,
      'bot_id': None,
      'bot_version': None,
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [],
      'cost_saved_usd': None,
      'created_ts': created_ts,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': created_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [],
      'started_ts': None,
      'state': State.PENDING,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': None,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())
    self.assertEqual([], run_results)

    # A bot reaps the TaskToRun.
    reaped_ts = self.now + datetime.timedelta(seconds=60)
    self.mock_now(reaped_ts)
    bot_dimensions = {u'OS': u'Windows-3.1.1'}
    reaped_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, reaped_request)
    self.assertTrue(run_result)
    result_summary, run_results = get_results(request.key)
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': created_ts,  # Time the TaskRequest was created.
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': reaped_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': reaped_ts,
      'state': State.RUNNING,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())
    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': None,
        'cost_usd': 0.,
        'durations': [],
        'exit_codes': [],
        'failure': False,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': reaped_ts,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': reaped_ts,
        'state': State.RUNNING,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [i.to_dict() for i in run_results])

    # The bot completes the task.
    done_ts = self.now + datetime.timedelta(seconds=120)
    self.mock_now(done_ts)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))
    self.assertEqual(
        (True, False),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Bar22', 0, 0, 0.2, False, False, 0.1,
            None))
    result_summary, run_results = get_results(request.key)
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': done_ts,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': created_ts,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': done_ts,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': reaped_ts,
      'state': State.COMPLETED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())
    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': done_ts,
        'cost_usd': 0.1,
        'durations': [0.1],
        'exit_codes': [0],
        'failure': False,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': done_ts,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': reaped_ts,
        'state': State.COMPLETED,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [t.to_dict() for t in run_results])
Example #18
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS,
                                         self.REQUIRED_KEYS, request,
                                         self.request, 'bot', 'keys')
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request['id']
        cost_usd = request['cost_usd']
        task_id = request['task_id']

        bot_overhead = request.get('bot_overhead')
        duration = request.get('duration')
        exit_code = request.get('exit_code')
        hard_timeout = request.get('hard_timeout')
        io_timeout = request.get('io_timeout')
        isolated_stats = request.get('isolated_stats')
        output = request.get('output')
        output_chunk_start = request.get('output_chunk_start')
        outputs_ref = request.get('outputs_ref')

        if bool(isolated_stats) != (bot_overhead is not None):
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' %
                                   task_id)
            self.abort_with_error(
                400,
                error='Both bot_overhead and isolated_stats must be set '
                'simultaneously\nbot_overhead: %s\nisolated_stats: %s' %
                (bot_overhead, isolated_stats))

        run_result_key = task_pack.unpack_run_result_key(task_id)
        performance_stats = None
        if isolated_stats:
            download = isolated_stats['download']
            upload = isolated_stats['upload']
            performance_stats = task_result.PerformanceStats(
                bot_overhead=bot_overhead,
                isolated_download=task_result.IsolatedOperation(
                    duration=download['duration'],
                    initial_number_items=download['initial_number_items'],
                    initial_size=download['initial_size'],
                    items_cold=base64.b64decode(download['items_cold']),
                    items_hot=base64.b64decode(download['items_hot'])),
                isolated_upload=task_result.IsolatedOperation(
                    duration=upload['duration'],
                    items_cold=base64.b64decode(upload['items_cold']),
                    items_hot=base64.b64decode(upload['items_hot'])))

        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error('Failed to decode output\n%s\n%r', e, output)
                output = output.encode('ascii', 'replace')
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error('Failed to decode output\n%s\n%r', e, output)
        if outputs_ref:
            outputs_ref = task_request.FilesRef(**outputs_ref)

        try:
            state = task_scheduler.bot_update_task(
                run_result_key=run_result_key,
                bot_id=bot_id,
                output=output,
                output_chunk_start=output_chunk_start,
                exit_code=exit_code,
                duration=duration,
                hard_timeout=hard_timeout,
                io_timeout=io_timeout,
                cost_usd=cost_usd,
                outputs_ref=outputs_ref,
                performance_stats=performance_stats)
            if not state:
                logging.info('Failed to update, please retry')
                self.abort_with_error(500,
                                      error='Failed to update, please retry')

            if state in (task_result.State.COMPLETED,
                         task_result.State.TIMED_OUT):
                action = 'task_completed'
            else:
                assert state in (task_result.State.BOT_DIED,
                                 task_result.State.RUNNING), state
                action = 'task_update'
            bot_management.bot_event(event_type=action,
                                     bot_id=bot_id,
                                     external_ip=self.request.remote_addr,
                                     dimensions=None,
                                     state=None,
                                     version=None,
                                     quarantined=None,
                                     task_id=task_id,
                                     task_name=None)
        except ValueError as e:
            ereporter2.log_request(request=self.request,
                                   source='server',
                                   category='task_failure',
                                   message='Failed to update task: %s' % e)
            self.abort_with_error(400, error=str(e))
        except webob.exc.HTTPException:
            raise
        except Exception as e:
            logging.exception('Internal error: %s', e)
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({'ok': True})
Example #19
0
  def test_cron_handle_bot_died(self):
    pub_sub_calls = self.mock_pub_sub()

    # Test first retry, then success.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    now = utils.utcnow()
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        created_ts=now,
        expiration_ts=now+datetime.timedelta(seconds=600),
        pubsub_topic='projects/abc/topics/def')
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(1, run_result.try_number)
    self.assertEqual(task_result.State.RUNNING, run_result.state)
    now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual(([], 1, 0), task_scheduler.cron_handle_bot_died('f.local'))

    # Refresh and compare:
    expected = {
      'abandoned_ts': now_1,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'cost_usd': 0.,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008811',
      'internal_failure': True,
      'modified_ts': now_1,
      'outputs_ref': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': task_result.State.BOT_DIED,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_1,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': None,
      'state': task_result.State.PENDING,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())

    # No PubSub notifications yet.
    self.assertEqual(0, len(pub_sub_calls))

    # Task was retried.
    now_2 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost-second', 'abc')
    logging.info('%s', [t.to_dict() for t in task_to_run.TaskToRun.query()])
    self.assertEqual(2, run_result.try_number)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost-second', 'Foo1', 0, 0, 0.1, False, False,
            0.1, None))
    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost-second',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': now_2,
      'costs_usd': [0., 0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_2,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': now_2,
      'state': task_result.State.COMPLETED,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'try_number': 2,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())
    self.assertEqual(0.1, run_result.key.get().cost_usd)

    # PubSub notification is sent.
    self.assertEqual(1, len(pub_sub_calls))
Example #20
0
  def test_cron_handle_bot_died(self):
    # Test first retry, then success.
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request_data(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
        scheduling_expiration_secs=600)
    request = task_request.make_request(data)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {
      u'OS': [u'Windows', u'Windows-3.1.1'],
      u'hostname': u'localhost',
      u'foo': u'bar',
    }
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(1, run_result.try_number)
    self.assertEqual(task_result.State.RUNNING, run_result.state)
    now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
    self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())

    # Refresh and compare:
    expected = {
      'abandoned_ts': now_1,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'cost_usd': 0.,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008811',
      'internal_failure': True,
      'modified_ts': now_1,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': task_result.State.BOT_DIED,
      'try_number': 1,
    }
    self.assertEqual(expected, run_result.key.get().to_dict())
    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': None,
      'costs_usd': [0.],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [],
      'exit_codes': [],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_1,
      'name': u'Request name',
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': None,
      'state': task_result.State.PENDING,
      'try_number': 1,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())

    # Task was retried.
    now_2 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
    _request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost-second', 'abc')
    logging.info('%s', [t.to_dict() for t in task_to_run.TaskToRun.query()])
    self.assertEqual(2, run_result.try_number)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost-second', 'Foo1', 0, 0, 0.1, False, False,
            0.1))
    expected = {
      'abandoned_ts': None,
      'bot_id': u'localhost-second',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': now_2,
      'costs_usd': [0., 0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [0],
      'failure': False,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': now_2,
      'name': u'Request name',
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': now_2,
      'state': task_result.State.COMPLETED,
      'try_number': 2,
      'user': u'Jesus',
    }
    self.assertEqual(expected, run_result.result_summary_key.get().to_dict())
    self.assertEqual(0.1, run_result.key.get().cost_usd)
Example #21
0
  def test_exit_code_failure(self):
    self.mock(random, 'getrandbits', lambda _: 0x88)
    data = _gen_request(
        properties=dict(dimensions={u'OS': u'Windows-3.1.1'}))
    request = task_request.make_request(data, True)
    _result_summary = task_scheduler.schedule_request(request)
    bot_dimensions = {'OS': 'Windows-3.1.1'}
    reaped_request, run_result = task_scheduler.bot_reap_task(
        bot_dimensions, 'localhost', 'abc')
    self.assertEqual(request, reaped_request)
    self.assertEqual(
        (True, True),
        task_scheduler.bot_update_task(
            run_result.key, 'localhost', 'Foo1', 0, 1, 0.1, False, False, 0.1,
            None))
    result_summary, run_results = get_results(request.key)

    expected = {
      'abandoned_ts': None,
      'bot_dimensions': bot_dimensions,
      'bot_id': u'localhost',
      'bot_version': u'abc',
      'children_task_ids': [],
      'completed_ts': self.now,
      'costs_usd': [0.1],
      'cost_saved_usd': None,
      'created_ts': self.now,
      'deduped_from': None,
      'durations': [0.1],
      'exit_codes': [1],
      'failure': True,
      'id': '1d69b9f088008810',
      'internal_failure': False,
      'modified_ts': self.now,
      'name': u'Request name',
      'outputs_ref': None,
      'properties_hash': None,
      'server_versions': [u'v1a'],
      'started_ts': self.now,
      'state': State.COMPLETED,
      'try_number': 1,
      'tags': [u'OS:Windows-3.1.1', u'priority:50', u'tag:1', u'user:Jesus'],
      'user': u'Jesus',
    }
    self.assertEqual(expected, result_summary.to_dict())

    expected = [
      {
        'abandoned_ts': None,
        'bot_dimensions': bot_dimensions,
        'bot_id': u'localhost',
        'bot_version': u'abc',
        'children_task_ids': [],
        'completed_ts': self.now,
        'cost_usd': 0.1,
        'durations': [0.1],
        'exit_codes': [1],
        'failure': True,
        'id': '1d69b9f088008811',
        'internal_failure': False,
        'modified_ts': self.now,
        'outputs_ref': None,
        'server_versions': [u'v1a'],
        'started_ts': self.now,
        'state': State.COMPLETED,
        'try_number': 1,
      },
    ]
    self.assertEqual(expected, [t.to_dict() for t in run_results])
Example #22
0
    def test_cron_handle_bot_died(self):
        # Test first retry, then success.
        self.mock(random, 'getrandbits', lambda _: 0x88)
        data = _gen_request_data(
            properties=dict(dimensions={u'OS': u'Windows-3.1.1'}),
            scheduling_expiration_secs=600)
        request = task_request.make_request(data)
        _result_summary = task_scheduler.schedule_request(request)
        bot_dimensions = {
            u'OS': [u'Windows', u'Windows-3.1.1'],
            u'hostname': u'localhost',
            u'foo': u'bar',
        }
        _request, run_result = task_scheduler.bot_reap_task(
            bot_dimensions, 'localhost', 'abc')
        self.assertEqual(1, run_result.try_number)
        self.assertEqual(task_result.State.RUNNING, run_result.state)
        now_1 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 1)
        self.assertEqual((0, 1, 0), task_scheduler.cron_handle_bot_died())

        # Refresh and compare:
        expected = {
            'abandoned_ts': now_1,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': None,
            'cost_usd': 0.,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008811',
            'internal_failure': True,
            'modified_ts': now_1,
            'server_versions': [u'v1a'],
            'started_ts': self.now,
            'state': task_result.State.BOT_DIED,
            'try_number': 1,
        }
        self.assertEqual(expected, run_result.key.get().to_dict())
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': None,
            'costs_usd': [0.],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [],
            'exit_codes': [],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': now_1,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': None,
            'state': task_result.State.PENDING,
            'try_number': 1,
            'user': u'Jesus',
        }
        self.assertEqual(expected,
                         run_result.result_summary_key.get().to_dict())

        # Task was retried.
        now_2 = self.mock_now(self.now + task_result.BOT_PING_TOLERANCE, 2)
        _request, run_result = task_scheduler.bot_reap_task(
            bot_dimensions, 'localhost-second', 'abc')
        logging.info('%s',
                     [t.to_dict() for t in task_to_run.TaskToRun.query()])
        self.assertEqual(2, run_result.try_number)
        self.assertEqual(
            (True, True),
            task_scheduler.bot_update_task(run_result.key, 'localhost-second',
                                           'Foo1', 0, 0, 0.1, False, False,
                                           0.1))
        expected = {
            'abandoned_ts': None,
            'bot_id': u'localhost-second',
            'bot_version': u'abc',
            'children_task_ids': [],
            'completed_ts': now_2,
            'costs_usd': [0., 0.1],
            'cost_saved_usd': None,
            'created_ts': self.now,
            'deduped_from': None,
            'durations': [0.1],
            'exit_codes': [0],
            'failure': False,
            'id': '1d69b9f088008810',
            'internal_failure': False,
            'modified_ts': now_2,
            'name': u'Request name',
            'properties_hash': None,
            'server_versions': [u'v1a'],
            'started_ts': now_2,
            'state': task_result.State.COMPLETED,
            'try_number': 2,
            'user': u'Jesus',
        }
        self.assertEqual(expected,
                         run_result.result_summary_key.get().to_dict())
        self.assertEqual(0.1, run_result.key.get().cost_usd)
Example #23
0
    def post(self, task_id=None):
        # Unlike handshake and poll, we do not accept invalid keys here. This code
        # path is much more strict.
        request = self.parse_body()
        msg = log_unexpected_subset_keys(self.ACCEPTED_KEYS, self.REQUIRED_KEYS, request, self.request, "bot", "keys")
        if msg:
            self.abort_with_error(400, error=msg)

        bot_id = request["id"]
        cost_usd = request["cost_usd"]
        task_id = request["task_id"]

        duration = request.get("duration")
        exit_code = request.get("exit_code")
        hard_timeout = request.get("hard_timeout")
        io_timeout = request.get("io_timeout")
        output = request.get("output")
        output_chunk_start = request.get("output_chunk_start")
        outputs_ref = request.get("outputs_ref")

        run_result_key = task_pack.unpack_run_result_key(task_id)
        if output is not None:
            try:
                output = base64.b64decode(output)
            except UnicodeEncodeError as e:
                logging.error("Failed to decode output\n%s\n%r", e, output)
                output = output.encode("ascii", "replace")
            except TypeError as e:
                # Save the output as-is instead. The error will be logged in ereporter2
                # and returning a HTTP 500 would only force the bot to stay in a retry
                # loop.
                logging.error("Failed to decode output\n%s\n%r", e, output)

        try:
            success, completed = task_scheduler.bot_update_task(
                run_result_key,
                bot_id,
                output,
                output_chunk_start,
                exit_code,
                duration,
                hard_timeout,
                io_timeout,
                cost_usd,
                outputs_ref,
            )
            if not success:
                self.abort_with_error(500, error="Failed to update, please retry")

            action = "task_completed" if completed else "task_update"
            bot_management.bot_event(
                event_type=action,
                bot_id=bot_id,
                external_ip=self.request.remote_addr,
                dimensions=None,
                state=None,
                version=None,
                quarantined=None,
                task_id=task_id,
                task_name=None,
            )
        except ValueError as e:
            ereporter2.log_request(
                request=self.request, source="server", category="task_failure", message="Failed to update task: %s" % e
            )
            self.abort_with_error(400, error=str(e))
        except Exception as e:
            self.abort_with_error(500, error=str(e))

        # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
        # reboots itself to abort the task abruptly. It is useful when a task hangs
        # and the timeout was set too long or the task was superseded by a newer
        # task with more recent executable (e.g. a new Try Server job on a newer
        # patchset on Rietveld).
        self.send_response({"ok": True})
Example #24
0
  def post(self, task_id=None):
    # Unlike handshake and poll, we do not accept invalid keys here. This code
    # path is much more strict.
    request = self.parse_body()
    msg = log_unexpected_subset_keys(
        self.ACCEPTED_KEYS, self.REQUIRED_KEYS, request, self.request, 'bot',
        'keys')
    if msg:
      self.abort_with_error(400, error=msg)

    bot_id = request['id']
    cost_usd = request['cost_usd']
    task_id = request['task_id']

    duration = request.get('duration')
    exit_code = request.get('exit_code')
    hard_timeout = request.get('hard_timeout')
    io_timeout = request.get('io_timeout')
    output = request.get('output')
    output_chunk_start = request.get('output_chunk_start')
    outputs_ref = request.get('outputs_ref')

    run_result_key = task_pack.unpack_run_result_key(task_id)
    if output is not None:
      try:
        output = base64.b64decode(output)
      except UnicodeEncodeError as e:
        logging.error('Failed to decode output\n%s\n%r', e, output)
        output = output.encode('ascii', 'replace')
      except TypeError as e:
        # Save the output as-is instead. The error will be logged in ereporter2
        # and returning a HTTP 500 would only force the bot to stay in a retry
        # loop.
        logging.error('Failed to decode output\n%s\n%r', e, output)

    try:
      success, completed = task_scheduler.bot_update_task(
          run_result_key, bot_id, output, output_chunk_start,
          exit_code, duration, hard_timeout, io_timeout, cost_usd, outputs_ref)
      if not success:
        logging.info('Failed to update, please retry')
        self.abort_with_error(500, error='Failed to update, please retry')

      action = 'task_completed' if completed else 'task_update'
      bot_management.bot_event(
          event_type=action, bot_id=bot_id,
          external_ip=self.request.remote_addr, dimensions=None, state=None,
          version=None, quarantined=None, task_id=task_id, task_name=None)
    except ValueError as e:
      ereporter2.log_request(
          request=self.request,
          source='server',
          category='task_failure',
          message='Failed to update task: %s' % e)
      self.abort_with_error(400, error=str(e))
    except webob.exc.HTTPException:
      raise
    except Exception as e:
      logging.exception('Internal error: %s', e)
      self.abort_with_error(500, error=str(e))

    # TODO(maruel): When a task is canceled, reply with 'DIE' so that the bot
    # reboots itself to abort the task abruptly. It is useful when a task hangs
    # and the timeout was set too long or the task was superseded by a newer
    # task with more recent executable (e.g. a new Try Server job on a newer
    # patchset on Rietveld).
    self.send_response({'ok': True})