예제 #1
0
    def test_yield_expired_task_to_run(self):
        self._gen_new_task_to_run_slices(1,
                                         created_ts=self.now,
                                         task_slices=[
                                             {
                                                 'expiration_secs': 60,
                                                 'properties':
                                                 _gen_properties(),
                                             },
                                         ])
        bot_dimensions = {u'id': [u'bot1'], u'pool': [u'default']}
        self.assertEqual(
            0,
            len(_yield_next_available_task_to_dispatch(bot_dimensions, None)))
        self.assertEqual(0, len(list(task_to_run.yield_expired_task_to_run())))

        # All tasks are now expired. Note that even if they still have .queue_number
        # set because the cron job wasn't run. They are still yielded by
        # yield_next_available_task_to_dispatch() because then task_scheduler can
        # expire them "inline" instead of waiting for a cron job.
        self.mock_now(self.now, 61)
        self.assertEqual(
            0,
            len(_yield_next_available_task_to_dispatch(bot_dimensions, None)))
        self.assertEqual(1, len(list(task_to_run.yield_expired_task_to_run())))
예제 #2
0
    def test_yield_expired_task_to_run(self):
        _gen_new_task_to_run(scheduling_expiration_secs=60)
        self.assertEqual(1, len(_yield_next_available_task_to_dispatch({})))
        self.assertEqual(0, len(list(task_to_run.yield_expired_task_to_run())))

        # All tasks are now expired. Note that even if they still have .queue_number
        # set because the cron job wasn't run, they are still not yielded by
        # yield_next_available_task_to_dispatch()
        self.mock_now(self.now, 61)
        self.assertEqual(0, len(_yield_next_available_task_to_dispatch({})))
        self.assertEqual(1, len(list(task_to_run.yield_expired_task_to_run())))
예제 #3
0
    def test_yield_expired_task_to_run(self):
        _gen_new_task_to_run(scheduling_expiration_secs=60)
        self.assertEqual(1, len(_yield_next_available_task_to_dispatch({})))
        self.assertEqual(0, len(list(task_to_run.yield_expired_task_to_run())))

        # All tasks are now expired. Note that even if they still have .queue_number
        # set because the cron job wasn't run, they are still not yielded by
        # yield_next_available_task_to_dispatch()
        self.mock_now(self.now, 61)
        self.assertEqual(0, len(_yield_next_available_task_to_dispatch({})))
        self.assertEqual(1, len(list(task_to_run.yield_expired_task_to_run())))
예제 #4
0
def cron_abort_expired_task_to_run():
  """Aborts expired TaskToRun requests to execute a TaskRequest on a bot.

  Three reasons can cause this situation:
  - Higher throughput of task requests incoming than the rate task requests
    being completed, e.g. there's not enough bots to run all the tasks that gets
    in at the current rate. That's normal overflow and must be handled
    accordingly.
  - No bot connected that satisfies the requested dimensions. This is trickier,
    it is either a typo in the dimensions or bots all died and the admins must
    reconnect them.
  - Server has internal failures causing it to fail to either distribute the
    tasks or properly receive results from the bots.
  """
  killed = 0
  skipped = 0
  try:
    for to_run in task_to_run.yield_expired_task_to_run():
      request = to_run.request_key.get()
      if _expire_task(to_run.key, request):
        killed += 1
        stats.add_task_entry(
            'task_request_expired',
            task_pack.request_key_to_result_summary_key(request.key),
            dimensions=request.properties.dimensions,
            user=request.user)
      else:
        # It's not a big deal, the bot will continue running.
        skipped += 1
  finally:
    # TODO(maruel): Use stats_framework.
    logging.info('Killed %d task, skipped %d', killed, skipped)
  return killed
예제 #5
0
  def test_yield_expired_task_to_run(self):
    # task_to_run_1: still active
    self._gen_new_task_to_run_slices(
        1,
        created_ts=self.now,
        task_slices=[{'expiration_secs': 60, 'properties': _gen_properties()}])
    # task_to_run_2: just reached to the expiraiton time
    _, to_run_2 = self._gen_new_task_to_run_slices(
        0,
        created_ts=self.now-datetime.timedelta(seconds=61),
        task_slices=[{'expiration_secs': 60, 'properties': _gen_properties()}])
    # task_to_run_3: already passed the expiration time 1 day ago
    _, to_run_3 = self._gen_new_task_to_run_slices(
        0,
        created_ts=self.now-datetime.timedelta(days=1),
        task_slices=[{'expiration_secs': 60, 'properties': _gen_properties()}])
    # task_to_run_4: already passed the expiration time long time ago
    self._gen_new_task_to_run_slices(
        0,
        created_ts=self.now-datetime.timedelta(weeks=4),
        task_slices=[{'expiration_secs': 60, 'properties': _gen_properties()}])

    bot_dimensions = {u'id': [u'bot1'], u'pool': [u'default']}

    self.assertEqual(
        0, len(_yield_next_available_task_to_dispatch(bot_dimensions)))

    expired_task_to_runs = list(task_to_run.yield_expired_task_to_run())

    # only to_run_2 and to_run_3 should be yielded
    expected = [to_run_2, to_run_3]
    sort_key = lambda x: x.expiration_ts
    self.assertEqual(
        sorted(expected, key=sort_key),
        sorted(expired_task_to_runs, key=sort_key))
예제 #6
0
def cron_abort_expired_task_to_run():
  """Aborts expired TaskToRun requests to execute a TaskRequest on a bot.

  Three reasons can cause this situation:
  - Higher throughput of task requests incoming than the rate task requests
    being completed, e.g. there's not enough bots to run all the tasks that gets
    in at the current rate. That's normal overflow and must be handled
    accordingly.
  - No bot connected that satisfies the requested dimensions. This is trickier,
    it is either a typo in the dimensions or bots all died and the admins must
    reconnect them.
  - Server has internal failures causing it to fail to either distribute the
    tasks or properly receive results from the bots.
  """
  killed = 0
  skipped = 0
  try:
    for to_run in task_to_run.yield_expired_task_to_run():
      request = to_run.request_key.get()
      if _expire_task(to_run.key, request):
        killed += 1
        stats.add_task_entry(
            'task_request_expired',
            task_pack.request_key_to_result_summary_key(request.key),
            dimensions=request.properties.dimensions,
            user=request.user)
      else:
        # It's not a big deal, the bot will continue running.
        skipped += 1
  finally:
    # TODO(maruel): Use stats_framework.
    logging.info('Killed %d task, skipped %d', killed, skipped)
  return killed
예제 #7
0
  def test_yield_expired_task_to_run(self):
    now = utils.utcnow()
    _gen_new_task_to_run(
        created_ts=now,
        expiration_ts=now+datetime.timedelta(seconds=60))
    self.assertEqual(1, len(_yield_next_available_task_to_dispatch({})))
    self.assertEqual(
        0, len(list(task_to_run.yield_expired_task_to_run())))

    # All tasks are now expired. Note that even if they still have .queue_number
    # set because the cron job wasn't run, they are still not yielded by
    # yield_next_available_task_to_dispatch()
    self.mock_now(self.now, 61)
    self.assertEqual(0, len(_yield_next_available_task_to_dispatch({})))
    self.assertEqual(
        1, len(list(task_to_run.yield_expired_task_to_run())))
예제 #8
0
  def test_yield_expired_task_to_run(self):
    now = utils.utcnow()
    _gen_new_task_to_run(
        created_ts=now,
        expiration_ts=now+datetime.timedelta(seconds=60))
    self.assertEqual(
        1,
        len(_yield_next_available_task_to_dispatch(
          {u'pool': u'default'}, None)))
    self.assertEqual(
        0, len(list(task_to_run.yield_expired_task_to_run())))

    # All tasks are now expired. Note that even if they still have .queue_number
    # set because the cron job wasn't run, they are still not yielded by
    # yield_next_available_task_to_dispatch()
    self.mock_now(self.now, 61)
    self.assertEqual(0, len(_yield_next_available_task_to_dispatch({}, None)))
    self.assertEqual(
        1, len(list(task_to_run.yield_expired_task_to_run())))
    def test_yield_expired_task_to_run(self):
        # There's a cut off at 2019-09-01, so the default self.now on Jan 2nd
        # doesn't work when looking 4 weeks ago.
        self.now = datetime.datetime(2019, 10, 10, 03, 04, 05, 06)
        self.mock_now(self.now, 0)
        # task_to_run_1: still active
        _, _to_run_1 = self._gen_new_task_to_run_slices(1,
                                                        created_ts=self.now,
                                                        task_slices=[{
                                                            'expiration_secs':
                                                            60,
                                                            'properties':
                                                            _gen_properties()
                                                        }])
        # task_to_run_2: just reached to the expiration time
        _, to_run_2 = self._gen_new_task_to_run_slices(
            0,
            created_ts=self.now - datetime.timedelta(seconds=61),
            task_slices=[{
                'expiration_secs': 60,
                'properties': _gen_properties()
            }])
        # task_to_run_3: already passed the expiration time 1 day ago
        _, to_run_3 = self._gen_new_task_to_run_slices(
            0,
            created_ts=self.now - datetime.timedelta(days=1),
            task_slices=[{
                'expiration_secs': 60,
                'properties': _gen_properties()
            }])
        # task_to_run_4: already passed the expiration time long time ago
        _, _to_run_4 = self._gen_new_task_to_run_slices(
            0,
            created_ts=self.now - datetime.timedelta(weeks=4),
            task_slices=[{
                'expiration_secs': 60,
                'properties': _gen_properties()
            }])

        bot_dimensions = {u'id': [u'bot1'], u'pool': [u'default']}

        self.assertEqual(
            0, len(_yield_next_available_task_to_dispatch(bot_dimensions)))

        expired_task_to_runs = list(task_to_run.yield_expired_task_to_run())

        # Only to_run_2 and to_run_3 should be yielded. to_run_4 is too old and is
        # ignored.
        expected = [to_run_2, to_run_3]
        sort_key = lambda x: x.expiration_ts
        self.assertEqual(sorted(expected, key=sort_key),
                         sorted(expired_task_to_runs, key=sort_key))
예제 #10
0
def cron_abort_expired_task_to_run(host):
  """Aborts expired TaskToRun requests to execute a TaskRequest on a bot.

  Three reasons can cause this situation:
  - Higher throughput of task requests incoming than the rate task requests
    being completed, e.g. there's not enough bots to run all the tasks that gets
    in at the current rate. That's normal overflow and must be handled
    accordingly.
  - No bot connected that satisfies the requested dimensions. This is trickier,
    it is either a typo in the dimensions or bots all died and the admins must
    reconnect them.
  - Server has internal failures causing it to fail to either distribute the
    tasks or properly receive results from the bots.

  Returns:
    Packed tasks ids of aborted and reenqueued tasks.
  """
  killed = []
  reenqueued = []
  skipped = 0
  try:
    for to_run in task_to_run.yield_expired_task_to_run():
      request = to_run.request_key.get()
      summary, next_slice = _expire_task(to_run.key, request)
      if next_slice:
        # Expiring a TaskToRun for TaskSlice may reenqueue a new TaskToRun.
        reenqueued.append(request)
      elif summary:
        killed.append(request)
      else:
        # It's not a big deal, the bot will continue running.
        skipped += 1
  finally:
    if killed:
      logging.warning(
          'EXPIRED!\n%d tasks:\n%s',
          len(killed),
          '\n'.join(
            '  %s/user/task/%s  %s' % (
              host, i.task_id, i.task_slice(0).properties.dimensions)
            for i in killed))
    logging.info(
        'Reenqueued %d tasks, killed %d, skipped %d',
        len(reenqueued), len(killed), skipped)
  # These are returned primarily for unit testing verification.
  return [i.task_id for i in killed], [i.task_id for i in reenqueued]
예제 #11
0
def cron_abort_expired_task_to_run(host):
    """Aborts expired TaskToRun requests to execute a TaskRequest on a bot.

  Three reasons can cause this situation:
  - Higher throughput of task requests incoming than the rate task requests
    being completed, e.g. there's not enough bots to run all the tasks that gets
    in at the current rate. That's normal overflow and must be handled
    accordingly.
  - No bot connected that satisfies the requested dimensions. This is trickier,
    it is either a typo in the dimensions or bots all died and the admins must
    reconnect them.
  - Server has internal failures causing it to fail to either distribute the
    tasks or properly receive results from the bots.

  Returns:
    Packed tasks ids of aborted tasks.
  """
    killed = []
    skipped = 0
    try:
        for to_run in task_to_run.yield_expired_task_to_run():
            request = to_run.request_key.get()
            if _expire_task(to_run.key, request):
                # TODO(maruel): Know which try it is.
                killed.append(request)
                ts_mon_metrics.tasks_expired.increment(
                    fields=ts_mon_metrics.extract_job_fields(request.tags))
                stats.add_task_entry(
                    'task_request_expired',
                    task_pack.request_key_to_result_summary_key(request.key),
                    dimensions=request.properties.dimensions,
                    user=request.user)
            else:
                # It's not a big deal, the bot will continue running.
                skipped += 1
    finally:
        if killed:
            logging.warning(
                'EXPIRED!\n%d tasks:\n%s', len(killed),
                '\n'.join('  %s/user/task/%s  %s' %
                          (host, i.task_id, i.properties.dimensions)
                          for i in killed))
        # TODO(maruel): Use stats_framework.
        logging.info('Killed %d task, skipped %d', len(killed), skipped)
    return [i.task_id for i in killed]
예제 #12
0
def cron_abort_expired_task_to_run(host):
    """Aborts expired TaskToRun requests to execute a TaskRequest on a bot.

  Three reasons can cause this situation:
  - Higher throughput of task requests incoming than the rate task requests
    being completed, e.g. there's not enough bots to run all the tasks that gets
    in at the current rate. That's normal overflow and must be handled
    accordingly.
  - No bot connected that satisfies the requested dimensions. This is trickier,
    it is either a typo in the dimensions or bots all died and the admins must
    reconnect them.
  - Server has internal failures causing it to fail to either distribute the
    tasks or properly receive results from the bots.

  Returns:
    Packed tasks ids of aborted tasks.
  """
    killed = []
    skipped = 0
    try:
        for to_run in task_to_run.yield_expired_task_to_run():
            request = to_run.request_key.get()
            if _expire_task(to_run.key, request):
                # TODO(maruel): Know which try it is.
                killed.append(request.task_id)
                stats.add_task_entry(
                    "task_request_expired",
                    task_pack.request_key_to_result_summary_key(request.key),
                    dimensions=request.properties.dimensions,
                    user=request.user,
                )
            else:
                # It's not a big deal, the bot will continue running.
                skipped += 1
    finally:
        if killed:
            logging.error(
                "EXPIRED!\n%d tasks:\n%s",
                len(killed),
                "\n".join("  https://%s/user/task/%s" % (host, i) for i in killed),
            )
        # TODO(maruel): Use stats_framework.
        logging.info("Killed %d task, skipped %d", len(killed), skipped)
    return killed