Beispiel #1
0
    def testGetIsolatedDataForStepNotOnlyFailure(self, mock_fn):
        master_name = 'm'
        builder_name = 'b'
        build_number = 223
        step_name = 'unit_tests'

        mock_fn.return_value = [
            SwarmingTaskData(item) for item in _SAMPLE_BUILD_STEP_DATA
        ]

        data = swarming.GetIsolatedDataForStep(master_name,
                                               builder_name,
                                               build_number,
                                               step_name,
                                               None,
                                               only_failure=False)
        expected_data = [{
            'digest':
            'isolatedhashunittests',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }, {
            'digest':
            'isolatedhashunittests1',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }]
        self.assertEqual(sorted(expected_data), sorted(data))
Beispiel #2
0
    def testGetIsolatedDataForStepNotOnlyFailure(self):
        master_name = 'm'
        builder_name = 'b'
        build_number = 223
        step_name = 'unit_tests'

        self.http_client._SetResponseForGetRequestSwarmingList(
            master_name, builder_name, build_number, step_name)
        data = swarming_util.GetIsolatedDataForStep(master_name,
                                                    builder_name,
                                                    build_number,
                                                    step_name,
                                                    self.http_client,
                                                    only_failure=False)
        expected_data = [{
            'digest':
            'isolatedhashunittests',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }, {
            'digest':
            'isolatedhashunittests1',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }]
        self.assertEqual(sorted(expected_data), sorted(data))
Beispiel #3
0
def _BotsAvailableForTask(step_metadata):
    """Check if there are available bots for a swarming task's dimensions.

  Args:
    step_metadata (dict): Info about a step to determine the bot's
        dimensions to query Swarming with about bot availability.

  Returns:
    (bool): Whether or not there are enough bots available to trigger the task
        immediately.
  """
    if not step_metadata:
        return False

    minimum_number_of_available_bots = (
        waterfall_config.GetSwarmingSettings().get(
            'minimum_number_of_available_bots',
            flake_constants.DEFAULT_MINIMUM_NUMBER_AVAILABLE_BOTS))
    minimum_percentage_of_available_bots = (
        waterfall_config.GetSwarmingSettings().get(
            'minimum_percentage_of_available_bots',
            flake_constants.DEFAULT_MINIMUM_PERCENTAGE_AVAILABLE_BOTS))
    dimensions = step_metadata.get('dimensions')
    bot_counts = swarming_util.GetBotCounts(swarming.SwarmingHost(),
                                            dimensions, FinditHttpClient)
    total_count = bot_counts.count or -1
    available_count = bot_counts.available or 0
    available_rate = float(available_count) / total_count

    return (available_count > minimum_number_of_available_bots
            and available_rate > minimum_percentage_of_available_bots)
Beispiel #4
0
def TriggerSwarmingTask(run_swarming_task_parameters, runner_id):
    """Triggers a swarming rerun for the given tests in a given build."""

    master_name, builder_name, build_number = (
        run_swarming_task_parameters.build_key.GetParts())
    step_name = run_swarming_task_parameters.step_name
    tests = run_swarming_task_parameters.tests

    assert tests, 'No tests to trigger swarming task for.'
    http_client = FinditHttpClient()

    # 1. Retrieve Swarming task parameters from a given Swarming task id.
    ref_task_id, ref_request = swarming.GetReferredSwarmingTaskRequestInfo(
        master_name, builder_name, build_number, step_name, http_client)

    # 2. Update/Overwrite parameters for the re-run.
    iterations_to_rerun = waterfall_config.GetSwarmingSettings().get(
        'iterations_to_rerun')
    new_request = CreateNewSwarmingTaskRequest(runner_id, ref_task_id,
                                               ref_request, master_name,
                                               builder_name, build_number,
                                               step_name, tests,
                                               iterations_to_rerun)

    # 3. Trigger a new Swarming task to re-run the failed tests.
    task_id, _ = swarming_util.TriggerSwarmingTask(swarming.SwarmingHost(),
                                                   new_request, http_client)

    if task_id:  # pragma: no branch.
        # 4. Update swarming task.
        OnSwarmingTaskTriggered(master_name, builder_name, build_number,
                                step_name, tests, task_id, iterations_to_rerun,
                                new_request)

    return task_id
Beispiel #5
0
def TriggerSwarmingTask(request, http_client):
  """Triggers a new Swarming task for the given request.

  The Swarming task priority will be overwritten, and extra tags might be added.
  Args:
    request (SwarmingTaskRequest): A Swarming task request.
    http_client (RetryHttpClient): An http client with automatic retry.
  """
  # Use a priority much lower than CQ for now (CQ's priority is 30).
  # Later we might use a higher priority -- a lower value here.
  # Note: the smaller value, the higher priority.
  swarming_settings = waterfall_config.GetSwarmingSettings()
  request_expiration_hours = swarming_settings.get('request_expiration_hours')
  request.priority = max(100, swarming_settings.get('default_request_priority'))
  request.expiration_secs = request_expiration_hours * 60 * 60

  request.tags.extend(['findit:1', 'project:Chromium', 'purpose:post-commit'])

  url = 'https://%s/_ah/api/swarming/v1/tasks/new' % swarming_settings.get(
      'server_host')
  response_data, error = _SendRequestToServer(
      url, http_client, request.Serialize())

  if not error:
    return json.loads(response_data)['task_id'], None

  return None, error
Beispiel #6
0
    def testRetrieveShardedTestResultsFromIsolatedServer(self):
        isolated_data = [{
            'digest':
            'shard1_isolated',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }, {
            'digest':
            'shard2_isolated',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }, {
            'digest':
            'shard3_isolated',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }]
        isolated_storage_url = waterfall_config.GetSwarmingSettings().get(
            'isolated_storage_url')
        self.http_client._SetResponseForPostRequest('shard1_isolated')
        self.http_client._SetResponseForPostRequest('shard1_url')
        self.http_client._SetResponseForGetRequestIsolated(
            'https://%s/default-gzip/shard1' % isolated_storage_url, 'shard1')
        self.http_client._SetResponseForPostRequest('shard2_isolated')
        self.http_client._SetResponseForPostRequest('shard2_url')
        self.http_client._SetResponseForGetRequestIsolated(
            'https://%s/default-gzip/shard2' % isolated_storage_url, 'shard2')
        self.http_client._SetResponseForPostRequest('shard3_isolated')
        self.http_client._SetResponseForPostRequest('shard3_url')
        self.http_client._SetResponseForGetRequestIsolated(
            'https://%s/default-gzip/shard3' % isolated_storage_url, 'shard3')

        result = swarming_util.RetrieveShardedTestResultsFromIsolatedServer(
            isolated_data, self.http_client)
        expected_results_file = os.path.join(os.path.dirname(__file__), 'data',
                                             'expected_collect_results')
        with open(expected_results_file, 'r') as f:
            expected_result = json.loads(f.read())

        self.assertEqual(expected_result, result)
Beispiel #7
0
def GetSwarmingTaskResultById(task_id, http_client):
  """Gets swarming result, checks state and returns outputs ref if needed."""
  base_url = ('https://%s/_ah/api/swarming/v1/task/%s/result') % (
      waterfall_config.GetSwarmingSettings().get('server_host'), task_id)
  json_data = {}

  data, error = _SendRequestToServer(base_url, http_client)

  if not error:
    json_data = json.loads(data)

  return json_data, error
    def _BotsAvailableForTask(self, step_metadata):
        """Check if there are available bots for this task's dimensions."""
        if not step_metadata:
            return False

        minimum_number_of_available_bots = (
            waterfall_config.GetSwarmingSettings().get(
                'minimum_number_of_available_bots', _MINIMUM_NUMBER_BOT))
        minimum_percentage_of_available_bots = (
            waterfall_config.GetSwarmingSettings().get(
                'minimum_percentage_of_available_bots', _MINIMUM_PERCENT_BOT))
        dimensions = step_metadata.get('dimensions')
        bot_counts = swarming_util.GetSwarmingBotCounts(
            dimensions, HttpClientAppengine())

        total_count = bot_counts.get('count') or -1
        available_count = bot_counts.get('available', 0)
        available_rate = float(available_count) / total_count

        return (available_count > minimum_number_of_available_bots
                and available_rate > minimum_percentage_of_available_bots)
Beispiel #9
0
    def testRetrieveShardedTestResultsFromIsolatedServerSingleShard(self):
        isolated_data = [{
            'digest':
            'shard1_isolated',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }]
        self.http_client._SetResponseForPostRequest('shard1_isolated')
        self.http_client._SetResponseForPostRequest('shard1_url')
        self.http_client._SetResponseForGetRequestIsolated(
            'https://%s/default-gzip/shard1' %
            waterfall_config.GetSwarmingSettings().get('isolated_storage_url'),
            'shard1')

        result = swarming_util.RetrieveShardedTestResultsFromIsolatedServer(
            isolated_data, self.http_client)

        expected_result = json.loads(
            zlib.decompress(self.http_client._GetData('isolated', 'shard1')))
        self.assertEqual(expected_result, result)
Beispiel #10
0
def GetSwarmingTaskRequest(task_id, http_client):
  """Returns an instance of SwarmingTaskRequest representing the given task."""
  swarming_server_host = waterfall_config.GetSwarmingSettings().get(
      'server_host')
  url = ('https://%s/_ah/api/swarming/v1/task/%s/request') % (
      swarming_server_host, task_id)
  content, error = _SendRequestToServer(url, http_client)

  # TODO(lijeffrey): Handle/report error in calling functions.
  if not error:
    json_data = json.loads(content)
    return SwarmingTaskRequest.Deserialize(json_data)
  return None
Beispiel #11
0
    def testRetrieveShardedTestResultsFromIsolatedServerFailed(self):
        isolated_data = [{
            'digest':
            'shard1_isolated',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }]

        result = swarming_util.RetrieveShardedTestResultsFromIsolatedServer(
            isolated_data, self.http_client)

        self.assertIsNone(result)
Beispiel #12
0
    def testDownloadTestResults(self):
        isolated_data = {
            'digest':
            'shard1_isolated',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }
        isolated_storage_url = waterfall_config.GetSwarmingSettings().get(
            'isolated_storage_url')
        self.http_client._SetResponseForPostRequest('shard1_isolated')
        self.http_client._SetResponseForPostRequest('shard1_url')
        self.http_client._SetResponseForGetRequestIsolated(
            'https://%s/default-gzip/shard1' % isolated_storage_url, 'shard1')

        result, error = swarming_util._DownloadTestResults(
            isolated_data, self.http_client)

        expected_result = json.loads(
            zlib.decompress(self.http_client._GetData('isolated', 'shard1')))
        self.assertEqual(expected_result, result)
        self.assertIsNone(error)
Beispiel #13
0
    def testGetSwarmingTaskFailureLog(self):
        outputs_ref = {
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server'),
            'namespace':
            'default-gzip',
            'isolated':
            'shard1_isolated'
        }

        self.http_client._SetResponseForPostRequest('shard1_isolated')
        self.http_client._SetResponseForPostRequest('shard1_url')
        self.http_client._SetResponseForGetRequestIsolated(
            'https://%s/default-gzip/shard1' %
            (waterfall_config.GetSwarmingSettings().get('isolated_storage_url')
             ), 'shard1')

        result, error = swarming_util.GetSwarmingTaskFailureLog(
            outputs_ref, self.http_client)

        expected_result = json.loads(
            zlib.decompress(self.http_client._GetData('isolated', 'shard1')))
        self.assertEqual(expected_result, result)
        self.assertIsNone(error)
Beispiel #14
0
    def testDownloadTestResultsFailedForFileUrl(self):
        isolated_data = {
            'digest':
            'shard1_isolated',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }
        self.http_client._SetResponseForPostRequest('shard1_isolated')
        result, error = swarming_util._DownloadTestResults(
            isolated_data, self.http_client)

        self.assertIsNone(result)
        self.assertIsNotNone(error)
Beispiel #15
0
    def testDownloadTestResultsFailedForSecondHash(self):
        isolated_data = {
            'digest':
            'not found',
            'namespace':
            'default-gzip',
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server')
        }

        result, error = swarming_util._DownloadTestResults(
            isolated_data, self.http_client)

        self.assertIsNone(result)
        self.assertIsNotNone(error)
Beispiel #16
0
    def testGetIsolatedOutputForTask(self):
        task_id = '2944afa502297110'
        self.http_client._SetResponseForGetRequestSwarmingResult(task_id)
        self.http_client._SetResponseForPostRequest('shard1_isolated')
        self.http_client._SetResponseForPostRequest('shard1_url')
        self.http_client._SetResponseForGetRequestIsolated(
            'https://%s/default-gzip/shard1' %
            (waterfall_config.GetSwarmingSettings().get('isolated_storage_url')
             ), 'shard1')

        result = swarming_util.GetIsolatedOutputForTask(
            task_id, self.http_client)

        expected_result = json.loads(
            zlib.decompress(self.http_client._GetData('isolated', 'shard1')))
        self.assertEqual(expected_result, result)
Beispiel #17
0
def ListSwarmingTasksDataByTags(
    master_name, builder_name, build_number, http_client,
    additional_tag_filters=None):
  """Downloads tasks data from swarming server.

  Args:
    master_name(str): Value of the master tag.
    builder_name(str): Value of the buildername tag.
    build_number(int): Value of the buildnumber tag.
    http_client(RetryHttpClient): The http client to send HTTPs requests.
    additional_tag_filters(dict): More tag filters to be added.
  """
  base_url = ('https://%s/_ah/api/swarming/v1/tasks/'
              'list?tags=%s&tags=%s&tags=%s') % (
                  waterfall_config.GetSwarmingSettings().get('server_host'),
                  urllib.quote('master:%s' % master_name),
                  urllib.quote('buildername:%s' % builder_name),
                  urllib.quote('buildnumber:%d' % build_number))
  additional_tag_filters = additional_tag_filters or {}
  for tag_name, tag_value in additional_tag_filters.iteritems():
    base_url += '&tags=%s' % urllib.quote('%s:%s' % (tag_name, tag_value))

  items = []
  cursor = None

  while True:
    if not cursor:
      url = base_url
    else:
      url = base_url + '&cursor=%s' % urllib.quote(cursor)
    new_data, _ = _SendRequestToServer(url, http_client)

    # TODO(lijeffrey): handle error in calling functions.
    if not new_data:
      break

    new_data_json = json.loads(new_data)
    if new_data_json.get('items'):
      items.extend(new_data_json['items'])

    if new_data_json.get('cursor'):
      cursor = new_data_json['cursor']
    else:
      break

  return items
Beispiel #18
0
 def check_task_completion():
   if task_completed and data is not None:
     task.created_time = (task.created_time or
                          self._ConvertDateTime(data.get('created_ts')))
     task.started_time = (task.started_time or
                          self._ConvertDateTime(data.get('started_ts')))
     task.completed_time = (task.completed_time or
                            self._ConvertDateTime(data.get('completed_ts')))
     task.put()
     pipeline_result = self._GetPipelineResult(
         step_name, step_name_no_platform, task)
     self.complete(pipeline_result)
   elif time.time() > deadline:  # pragma: no cover
     # Timeout.
     # Updates status as ERROR.
     task.status = analysis_status.ERROR
     task.error = {
         'code': swarming_util.TIMED_OUT,
         'message': 'Process swarming task result timed out'
     }
     task.put()
     timeout_hours = waterfall_config.GetSwarmingSettings().get(
         'task_timeout_hours')
     logging.error('Swarming task timed out after %d hours.' % timeout_hours)
     pipeline_result = self._GetPipelineResult(
         step_name, step_name_no_platform, task)
     self.complete(pipeline_result)
   else:
     self.last_params = {
         'task_id': task_id,
         'step_name': step_name,
         'call_args': call_args,
         'deadline': deadline,
         'server_query_interval_seconds': server_query_interval_seconds,
         'task_started': task_started,
         'task_completed': task_completed,
         'step_name_no_platform': step_name_no_platform,
     }
     # Update the stored callback url with possibly modified params.
     new_callback_url = self.get_callback_url(callback_params=json.dumps(
         self.last_params))
     if task.callback_url != new_callback_url:  # pragma: no cover
       task.callback_url = new_callback_url
       task.put()
 def testGetSwarmingSettings(self):
     self.assertEqual(
         {
             'server_host': 'chromium-swarm.appspot.com',
             'default_request_priority': 150,
             'request_expiration_hours': 20,
             'server_query_interval_seconds': 60,
             'task_timeout_hours': 23,
             'isolated_server': 'https://isolateserver.appspot.com',
             'isolated_storage_url': 'isolateserver.storage.googleapis.com',
             'iterations_to_rerun': 10,
             'get_swarming_task_id_timeout_seconds': 300,
             'get_swarming_task_id_wait_seconds': 10,
             'server_retry_timeout_hours': 2,
             'maximum_server_contact_retry_interval_seconds': 5 * 60,
             'should_retry_server': False,
             'minimum_number_of_available_bots': 5,
             'minimum_percentage_of_available_bots': 0.1,
         }, waterfall_config.GetSwarmingSettings())
Beispiel #20
0
    def testGetSwarmingTaskResultById(self):
        task_id = '2944afa502297110'

        self.http_client._SetResponseForGetRequestSwarmingResult(task_id)

        data, error = swarming_util.GetSwarmingTaskResultById(
            task_id, self.http_client)

        expected_outputs_ref = {
            'isolatedserver':
            waterfall_config.GetSwarmingSettings().get('isolated_server'),
            'namespace':
            'default-gzip',
            'isolated':
            'shard1_isolated'
        }

        self.assertEqual('COMPLETED', data['state'])
        self.assertEqual(expected_outputs_ref, data['outputs_ref'])
        self.assertIsNone(error)
Beispiel #21
0
def GetTaskIdFromSwarmingTaskEntity(urlsafe_task_key):
  """Gets swarming task id from SwarmingTask. Waits and polls if needed."""
  swarming_settings = waterfall_config.GetSwarmingSettings()
  wait_seconds = swarming_settings.get('get_swarming_task_id_wait_seconds')
  timeout_seconds = swarming_settings.get(
      'get_swarming_task_id_timeout_seconds')
  deadline = time.time() + timeout_seconds

  while time.time() < deadline:
    swarming_task = ndb.Key(urlsafe=urlsafe_task_key).get()

    if not swarming_task:
      raise Exception('Swarming task was deleted unexpectedly!')

    if swarming_task.task_id:
      return swarming_task.task_id
    # Wait for the existing pipeline to start the Swarming task.
    time.sleep(wait_seconds)

  raise Exception('Timed out waiting for task_id.')
Beispiel #22
0
    def _GetSwarmingTaskId(self, *args):
        swarming_settings = waterfall_config.GetSwarmingSettings()
        wait_seconds = swarming_settings.get(
            'get_swarming_task_id_wait_seconds')
        timeout_seconds = swarming_settings.get(
            'get_swarming_task_id_timeout_seconds')
        deadline = time.time() + timeout_seconds

        while time.time() < deadline:
            swarming_task = self._GetSwarmingTask(*args)

            if not swarming_task:  # pragma: no cover. Pipeline will retry.
                raise Exception('Swarming task was deleted unexpectedly!')

            if swarming_task.task_id:
                return swarming_task.task_id

            # Wait for the existing pipeline to start the Swarming task.
            time.sleep(wait_seconds)

        raise Exception('Time out!')  # pragma: no cover. Pipeline will retry.
Beispiel #23
0
def TriggerSwarmingTask(request, http_client):
    """Triggers a new Swarming task for the given request.

  The Swarming task priority will be overwritten, and extra tags might be added.
  Args:
    request (SwarmingTaskRequest): A Swarming task request.
    http_client (RetryHttpClient): An http client with automatic retry.
  """
    # Use a priority much lower than CQ for now (CQ's priority is 30).
    # Later we might use a higher priority -- a lower value here.
    # Note: the smaller value, the higher priority.
    swarming_settings = waterfall_config.GetSwarmingSettings()
    request_expiration_hours = swarming_settings.get(
        'request_expiration_hours')
    request.priority = str(
        max(100, swarming_settings.get('default_request_priority')))
    request.expiration_secs = str(request_expiration_hours * 60 * 60)

    request.tags.extend(
        ['findit:1', 'project:Chromium', 'purpose:post-commit'])

    return swarming_util.TriggerSwarmingTask(SwarmingHost(), request,
                                             http_client)
Beispiel #24
0
def GetSwarmingBotCounts(dimensions, http_client):
  """Gets number of swarming bots for certain dimensions.

  Args:
    dimensions (dict): A dict of dimensions.
    http_client (HttpClient): The httpclient object with which to make the
      server calls.
  Returns:
    bot_counts (dict): Dict of numbers of available swarming bots.
  """
  if not dimensions:
    return {}

  swarming_server_host = waterfall_config.GetSwarmingSettings().get(
      'server_host')
  url = 'https://%s/_ah/api/swarming/v1/bots/count' % swarming_server_host

  dimension_list = ['%s:%s' % (k, v) for k, v in dimensions.iteritems()]
  dimension_url = '&dimensions='.join(dimension_list)
  # Url looks like 'https://chromium-swarm.appspot.com/_ah/api/swarming/v1/bots
  # /count?dimensions=os:Windows-7-SP1&dimensions=cpu:x86-64'
  url = '%s?dimensions=%s' % (url, dimension_url)

  content, error = _SendRequestToServer(url, http_client)
  if error or not content:
    return {}

  content_data = json.loads(content)

  bot_counts = {
      k: int(content_data.get(k, 0)) for k in
      ('busy', 'count', 'dead', 'quarantined')
  }
  bot_counts['available'] = (bot_counts['count'] - bot_counts['busy'] -
                             bot_counts['dead'] - bot_counts['quarantined'])

  return bot_counts
Beispiel #25
0
def _GenerateSwarmingTasksData(failure_result_map):
  """Collects info for all related swarming tasks.

  Returns: A dict as below:
      {
          'step1': {
              'swarming_tasks': {
                  'm/b/121': {
                      'task_info': {
                          'status': 'Completed',
                          'task_id': 'task1',
                          'task_url': ('https://chromium-swarm.appspot.com/user'
                                       '/task/task1')
                      },
                      'all_tests': ['test2', 'test3', 'test4'],
                      'reliable_tests': ['test2'],
                      'flaky_tests': ['test3', 'test4']
                  }
              }
          },
          'step2': {
              'swarming_tasks': {
                  'm/b/121': {
                      'task_info': {
                          'status': 'Pending'
                      },
                      'all_tests': ['test1']
                  }
              }
          },
          'step3': {
              'swarming_tasks': {
                  'm/b/121': {
                      'task_info': {
                          'status': 'No swarming rerun found'
                      },
                      'all_tests': ['test1']
                  }
              }
          }
      }
  """

  tasks_info = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))

  swarming_server = waterfall_config.GetSwarmingSettings()['server_host']

  for step_name, failure in failure_result_map.iteritems():
    step_tasks_info = tasks_info[step_name]['swarming_tasks']

    if isinstance(failure, dict):
      # Only swarming test failures have swarming re-runs.
      swarming_task_keys = set(failure.values())

      for key in swarming_task_keys:
        task_dict = step_tasks_info[key]
        referred_build_keys = BaseBuildModel.GetBuildInfoFromBuildKey(key)
        task = WfSwarmingTask.Get(*referred_build_keys, step_name=step_name)
        all_tests = _GetAllTestsForASwarmingTask(key, failure)
        task_dict['all_tests'] = all_tests
        if not task:  # In case task got manually removed from data store.
          task_info = {'status': result_status.NO_SWARMING_TASK_FOUND}
        else:
          task_info = {'status': task.status}

          # Get the step name without platform.
          # This value should have been saved in task.parameters;
          # in case of no such value saved, split the step_name.
          task_dict['ref_name'] = (
              step_name.split()[0]
              if not task.parameters or not task.parameters.get('ref_name') else
              task.parameters['ref_name'])

          if task.task_id:  # Swarming rerun has started.
            task_info['task_id'] = task.task_id
            task_info['task_url'] = 'https://%s/user/task/%s' % (
                swarming_server, task.task_id)
          if task.classified_tests:
            # Swarming rerun has completed.
            # Use its result to get reliable and flaky tests.
            # If task has not completed, there will be no try job yet,
            # the result will be grouped in unclassified failures temporarily.
            reliable_tests = task.classified_tests.get('reliable_tests', [])
            task_dict['reliable_tests'] = [
                test for test in reliable_tests if test in all_tests
            ]
            flaky_tests = task.classified_tests.get('flaky_tests', [])
            task_dict['flaky_tests'] = [
                test for test in flaky_tests if test in all_tests
            ]

        task_dict['task_info'] = task_info
    else:
      step_tasks_info[failure] = {
          'task_info': {
              'status': result_status.NON_SWARMING_NO_RERUN
          }
      }

  return tasks_info
Beispiel #26
0
def _SendRequestToServer(url, http_client, post_data=None):
  """Sends GET/POST request to arbitrary url and returns response content.

  Because the Swarming and Isolated servers that _SendRequestToServer tries to
  contact are prone to outages, exceptions trying to reach them may occur thus
  this method should retry. We want to monitor and document these occurrences
  even if the request eventually succeeds after retrying, with the last error
  encountered being the one that is reported.

  Args:
    url (str): The url to send the request to.
    http_client (HttpClient): The httpclient object with which to make the
      server calls.
    post_data (dict): Data/params to send with the request, if any.

  Returns:
    content (dict), error (dict): The content from the server and the last error
    encountered trying to retrieve it.
  """
  headers = {'Authorization': 'Bearer ' + auth_util.GetAuthToken()}
  swarming_settings = waterfall_config.GetSwarmingSettings()
  should_retry = swarming_settings.get('should_retry_server')
  timeout_seconds = (
      swarming_settings.get('server_retry_timeout_hours') * 60 * 60)
  maximum_retry_interval = swarming_settings.get(
      'maximum_server_contact_retry_interval_seconds')
  deadline = time.time() + timeout_seconds
  retry_backoff = 60
  tries = 1
  error = None

  if post_data:
    post_data = json.dumps(post_data, sort_keys=True, separators=(',', ':'))
    headers['Content-Type'] = 'application/json; charset=UTF-8'
    headers['Content-Length'] = len(post_data)

  while True:
    try:
      if post_data:
        status_code, content = http_client.Post(url, post_data, headers=headers)
      else:
        status_code, content = http_client.Get(url, headers=headers)
    except ConnectionClosedError as e:
      error = {
          'code': URLFETCH_CONNECTION_CLOSED_ERROR,
          'message': e.message
      }
      _OnConnectionFailed(url, 'ConnectionClosedError')
    except DeadlineExceededError as e:
      error = {
          'code': URLFETCH_DEADLINE_EXCEEDED_ERROR,
          'message': e.message
      }
      _OnConnectionFailed(url, 'DeadlineExceededError')
    except DownloadError as e:
      error = {
          'code': URLFETCH_DOWNLOAD_ERROR,
          'message': e.message
      }
      _OnConnectionFailed(url, 'DownloadError')
    except Exception as e:  # pragma: no cover
      logging.error(
          'An unknown exception occurred that need to be monitored: %s',
          e.message)
      error = {
          'code': UNKNOWN,
          'message': e.message
      }
      _OnConnectionFailed(url, 'Unknown Exception')

    if error or status_code != 200:
      # The retry upon 50x (501 excluded) is automatically handled in the
      # underlying http_client.
      # By default, it retries 5 times with exponential backoff.
      error = error or {
          'code': EXCEEDED_MAX_RETRIES_ERROR,
          'message': 'Max retries exceeded trying to reach %s' % url
      }
      logging.error(error['message'])
    else:
      # Even if the call is successful, still return the last error encountered.
      return content, error

    if should_retry and time.time() < deadline:  # pragma: no cover
      # Wait, then retry if applicable.
      wait_time = _GetBackoffSeconds(
          retry_backoff, tries, maximum_retry_interval)
      logging.info('Retrying connection to %s in %d seconds', url, wait_time)
      time.sleep(wait_time)
      tries += 1
    else:
      if should_retry:
        # Indicate in the error that the retry timeout was reached.
        error['retry_timeout'] = True
      break

  logging.error('Failed to get an adequate response from %s. No data could be '
                'retrieved', url)
  return None, error
Beispiel #27
0
  def run(self, master_name, builder_name, build_number, step_name,
          task_id=None, *args):
    """Monitors a swarming task.

    Args:
      master_name (str): The master name.
      builder_name (str): The builder name.
      build_number (str): The build number.
      step_name (str): The failed test step name.
      task_id (str): The task id to query the swarming server on the progresss
        of a swarming task.
    """
    call_args = self._GetArgs(master_name, builder_name, build_number,
                              step_name, *args)
    task = self._GetSwarmingTask(*call_args)

    task_id = task_id or task.task_id

    if not task_id:
      # The swarming task encountered an error when being triggered.
      if not task.error:  # pragma no branch
        task.error = {
            'error': 'Undetected error in swarming task. No task id found!',
            'message': 'Undetected error in swarming task. No task id found!'
        }
        task.put()
      return

    # Check to make this method idempotent.
    if task.callback_url and self.pipeline_id in task.callback_url:
      return

    timeout_hours = waterfall_config.GetSwarmingSettings().get(
        'task_timeout_hours')
    deadline = time.time() + timeout_hours * 60 * 60
    server_query_interval_seconds = waterfall_config.GetSwarmingSettings().get(
        'server_query_interval_seconds')
    task_started = False
    task_completed = False
    step_name_no_platform = None

    if task_id.lower() in (NO_TASK, NO_TASK_EXCEPTION):  # pragma: no branch
      # This situation happens in flake analysis: if the step with flaky test
      # didn't exist in checked build or the build had exception so the step
      # with flaky test didn't run at all, we should skip the build.
      has_valid_artifact = task_id != NO_TASK_EXCEPTION
      task.task_id = None
      task.status = analysis_status.SKIPPED
      task.put()
      self._UpdateMasterFlakeAnalysis(
          *call_args, pass_rate=-1, flake_swarming_task=task,
          has_valid_artifact=has_valid_artifact)
      self.complete(self._GetPipelineResult(
          step_name, step_name_no_platform, task))
      return

    self.last_params = {
        'task_id': task_id,
        'step_name': step_name,
        'call_args': call_args,
        'deadline': deadline,
        'server_query_interval_seconds': server_query_interval_seconds,
        'task_started': task_started,
        'task_completed': task_completed,
        'step_name_no_platform': step_name_no_platform,
    }

    task.callback_url = self.get_callback_url(callback_params=json.dumps(
        self.last_params))
    task.callback_target = appengine_util.GetTargetNameForModule(
        constants.WATERFALL_BACKEND)
    task.put()

    # Guarantee one callback 10 minutes after the deadline to clean up even if
    # Swarming fails to call us back.
    self.delay_callback((timeout_hours * 60 + 10) * 60, self.last_params,
                        name=task_id + '_cleanup_task')

    # Run immediately in case the task already went from scheduled to started.
    self.callback(callback_params=self.last_params)
Beispiel #28
0
    def testGetIsolatedDataForFailedBuild(self):
        master_name = 'm'
        builder_name = 'b'
        build_number = 223
        failed_steps = {
            'a_tests': {
                'current_failure': 2,
                'first_failure': 0
            },
            'unit_tests': {
                'current_failure': 2,
                'first_failure': 0
            },
            'compile': {
                'current_failure': 2,
                'first_failure': 0
            }
        }

        self.http_client._SetResponseForGetRequestSwarmingList(
            master_name, builder_name, build_number)
        result = swarming_util.GetIsolatedDataForFailedBuild(
            master_name, builder_name, build_number, failed_steps,
            self.http_client)

        expected_failed_steps = {
            'a_tests': {
                'current_failure':
                2,
                'first_failure':
                0,
                'list_isolated_data': [{
                    'digest':
                    'isolatedhashatests',
                    'namespace':
                    'default-gzip',
                    'isolatedserver':
                    (waterfall_config.GetSwarmingSettings().get(
                        'isolated_server'))
                }]
            },
            'unit_tests': {
                'current_failure':
                2,
                'first_failure':
                0,
                'list_isolated_data': [{
                    'digest':
                    'isolatedhashunittests1',
                    'namespace':
                    'default-gzip',
                    'isolatedserver':
                    (waterfall_config.GetSwarmingSettings().get(
                        'isolated_server'))
                }]
            },
            'compile': {
                'current_failure': 2,
                'first_failure': 0
            }
        }

        for step_name in failed_steps:
            step = WfStep.Get(master_name, builder_name, build_number,
                              step_name)
            if step_name == 'compile':
                self.assertIsNone(step)
            else:
                self.assertIsNotNone(step)

        self.assertTrue(result)
        self.assertEqual(expected_failed_steps, failed_steps)
 def _GetIterationsToRerun(self):
     return waterfall_config.GetSwarmingSettings().get(
         'iterations_to_rerun')
Beispiel #30
0
 def TimeoutSeconds(self):
   timeout_hours = waterfall_config.GetSwarmingSettings().get(
       'task_timeout_hours', 24)
   return timeout_hours * 60 * 60