Ejemplo n.º 1
0
class BuildRun(ndb.Model):  # pragma: no cover
  @staticmethod
  def removeMasterPrefix(master):
    if master.startswith('master.'):
      return master[len('master.'):]
    else:
      return master

  def getURL(self):
    parent = self.key.parent().get()
    return ('https://build.chromium.org/p/' +
            self.removeMasterPrefix(parent.master) + '/builders/' +
            parent.builder + '/builds/' + str(self.buildnumber))

  def getMiloURL(self):
    # In July 2016, protobuf changed and URLs for earlier builds do not open.
    if self.time_finished < datetime.datetime(2016, 8, 1):
      return
    parent = self.key.parent().get()
    return ('https://luci-milo.appspot.com/buildbot/' +
            self.removeMasterPrefix(parent.master) + '/' + parent.builder +
            '/' + str(self.buildnumber))

  buildnumber = ndb.IntegerProperty(required=True)
  result = ndb.IntegerProperty(required=True)
  time_finished = ndb.DateTimeProperty(required=True)
  time_started = ndb.DateTimeProperty(default=datetime.datetime.max)

  is_success = ndb.ComputedProperty(
      lambda self: build_result.isResultSuccess(self.result))
  is_failure = ndb.ComputedProperty(
      lambda self: build_result.isResultFailure(self.result))
Ejemplo n.º 2
0
class BuildRun(ndb.Model):
    def getURL(self):
        return ('http://build.chromium.org/p/' +
                self.key.parent().get().master + '/builders/' +
                self.key.parent().get().builder + '/builds/' +
                str(self.buildnumber))

    buildnumber = ndb.IntegerProperty(required=True)
    result = ndb.IntegerProperty(required=True)
    time_finished = ndb.DateTimeProperty(required=True)

    is_success = ndb.ComputedProperty(
        lambda self: build_result.isResultSuccess(self.result))
    is_failure = ndb.ComputedProperty(
        lambda self: build_result.isResultFailure(self.result))
Ejemplo n.º 3
0
  def post(self):
    if (not self.request.get('failure_run_key') or
        not self.request.get('success_run_key')):
      self.response.set_status(400, 'Invalid request parameters')
      return

    failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get()
    success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get()

    flaky_run = FlakyRun(
        failure_run=failure_run.key,
        failure_run_time_started=failure_run.time_started,
        failure_run_time_finished=failure_run.time_finished,
        success_run=success_run.key)

    failure_time = failure_run.time_finished
    patchset_builder_runs = failure_run.key.parent().get()

    master = BuildRun.removeMasterPrefix(patchset_builder_runs.master)
    url = ('https://chrome-build-extract.appspot.com/p/' + master +
           '/builders/' + patchset_builder_runs.builder +'/builds/' +
           str(failure_run.buildnumber) + '?json=1')
    urlfetch.set_default_fetch_deadline(60)
    logging.info('get_flaky_run_reason ' + url)
    response = urlfetch.fetch(url)
    if response.status_code >= 400 and response.status_code <= 599:
      logging.error('The request to %s has returned %d: %s', url,
                    response.status_code, response.content)
      self.response.set_status(500, 'Failed to fetch build.')
      return
    json_result = json.loads(response.content)
    steps = json_result['steps']

    failed_steps = []
    passed_steps = []
    for step in steps:
      result = step['results'][0]
      if build_result.isResultSuccess(result):
        passed_steps.append(step)
        continue
      if not build_result.isResultFailure(result):
        continue
      step_name = step['name']
      step_text = ' '.join(step['text'])
      if step_name in IGNORED_STEPS:
        continue

      # Custom (non-trivial) rules for ignoring flakes in certain steps:
      #  - [swarming] ...: summary step would also be red (do not double count)
      #  - Patch failure: ingore non-infra failures as they are typically due to
      #    changes in the code on HEAD
      #  - bot_update PATCH FAILED: Duplicates failure in 'Patch failure' step.
      #  - ... (retry summary): this is an artificial step to fail the build due
      #    to another step that has failed earlier (do not double count).
      if (step_name.startswith('[swarming]') or
          (step_name == 'Patch failure' and result != build_result.EXCEPTION) or
          (step_name == 'bot_update' and 'PATCH FAILED' in step_text)):
        continue

      failed_steps.append(step)

    steps_to_ignore = []
    for step in failed_steps:
      step_name = step['name']
      if '(with patch)' in step_name:
        # Ignore any steps from the same test suite, which is determined by the
        # normalized step name. Additionally, if the step fails without patch,
        # ignore the original step as well because tree is busted.
        normalized_step_name = normalize_test_type(step_name, True)
        for other_step in failed_steps:
          if other_step == step:
            continue
          normalized_other_step_name = normalize_test_type(
              other_step['name'], True)
          if normalized_other_step_name == normalized_step_name:
            steps_to_ignore.append(other_step['name'])
            if '(without patch)' in other_step['name']:
              steps_to_ignore.append(step['name'])

    flakes_to_update = []
    for step in failed_steps:
      step_name = step['name']
      if step_name in steps_to_ignore:
        continue
      flakes, is_step = self.get_flakes(
          master, patchset_builder_runs.builder, failure_run.buildnumber, step)
      for flake in flakes:
        flake_occurrence = FlakeOccurrence(name=step_name, failure=flake)
        flaky_run.flakes.append(flake_occurrence)
        flakes_to_update.append((flake, is_step))

    # Do not create FlakyRuns if all failed steps have been ignored.
    if not flaky_run.flakes:
      return

    flaky_run_key = flaky_run.put()
    for flake, is_step in flakes_to_update:
      self.add_failure_to_flake(flake, flaky_run_key, failure_time, is_step)
    self.flaky_runs.increment_by(1)
Ejemplo n.º 4
0
  def post(self):
    if (not self.request.get('failure_run_key') or
        not self.request.get('success_run_key')):
      self.response.set_status(400, 'Invalid request parameters')
      return

    failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get()
    success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get()

    flaky_run = FlakyRun(
        failure_run=failure_run.key,
        failure_run_time_started=failure_run.time_started,
        failure_run_time_finished=failure_run.time_finished,
        success_run=success_run.key)

    success_time = success_run.time_finished
    failure_time = failure_run.time_finished
    patchset_builder_runs = failure_run.key.parent().get()

    # TODO(sergiyb): The parsing logic below is very fragile and will break with
    # any changes to step names and step text. We should move away from parsing
    # buildbot to tools like flakiness dashboard (test-results.appspot.com),
    # which uses a standartized JSON format.
    url = ('http://build.chromium.org/p/' + patchset_builder_runs.master +
           '/json/builders/' + patchset_builder_runs.builder +'/builds/' +
           str(failure_run.buildnumber))
    urlfetch.set_default_fetch_deadline(60)
    logging.info('get_flaky_run_reason ' + url)
    result = urlfetch.fetch(url).content
    try:
      json_result = json.loads(result)
    except ValueError:
      logging.exception('couldnt decode json for %s', url)
      return
    steps = json_result['steps']

    failed_steps = []
    passed_steps = []
    for step in steps:
      result = step['results'][0]
      if build_result.isResultSuccess(result):
        passed_steps.append(step)
        continue
      if not build_result.isResultFailure(result):
        continue
      step_name = step['name']
      step_text = ' '.join(step['text'])
      # The following step failures are ignored:
      #  - steps: always red when any other step is red (not a failure)
      #  - [swarming] ...: summary step would also be red (do not double count)
      #  - presubmit: typically red due to missing OWNERs LGTM, not a flake
      #  - recipe failure reason: always red when build fails (not a failure)
      #  - Patch failure: if success run was before failure run, it is
      #    likely a legitimate failure. For example it often happens that
      #    developers use CQ dry run and then wait for a review. Once getting
      #    LGTM they check CQ checkbox, but the patch does not cleanly apply
      #    anymore.
      #  - bot_update PATCH FAILED: Corresponds to 'Patch failure' step.
      #  - test results: always red when another step is red (not a failure)
      #  - Uncaught Exception: summary step referring to an exception in another
      #    step (e.g. bot_update)
      #  - ... (retry summary): this is an artificial step to fail the build due
      #    to another step that has failed earlier (do not double count).
      if (step_name == 'steps' or step_name.startswith('[swarming]') or
          step_name == 'presubmit' or step_name == 'recipe failure reason' or
          (step_name == 'Patch failure' and success_time < failure_time) or
          (step_name == 'bot_update' and 'PATCH FAILED' in step_text) or
          step_name == 'test results' or step_name == 'Uncaught Exception' or
          step_name.endswith(' (retry summary)')):
        continue
      failed_steps.append(step)

    steps_to_ignore = []
    for step in failed_steps:
      step_name = step['name']
      if ' (with patch)' in step_name:
        # Android instrumentation tests add a prefix before the step name, which
        # doesn't appear on the summary step (without suffixes). To make sure we
        # correctly ignore duplicate failures, we remove the prefix.
        step_name = step_name.replace('Instrumentation test ', '')

        # If a step fails without the patch, then the tree is busted. Don't
        # count as flake.
        step_name_with_no_modifier = step_name.replace(' (with patch)', '')
        step_name_without_patch = (
            '%s (without patch)' % step_name_with_no_modifier)
        for other_step in failed_steps:
          if other_step['name'] == step_name_without_patch:
            steps_to_ignore.append(step['name'])
            steps_to_ignore.append(other_step['name'])

    flakes_to_update = []
    for step in failed_steps:
      step_name = step['name']
      if step_name in steps_to_ignore:
        continue
      flakes = self.get_flakes(
          patchset_builder_runs.master, patchset_builder_runs.builder,
          failure_run.buildnumber, step)
      for flake in flakes:
        flake_occurrence = FlakeOccurrence(name=step_name, failure=flake)
        flaky_run.flakes.append(flake_occurrence)
        flakes_to_update.append(flake)

    flaky_run_key = flaky_run.put()
    for flake in flakes_to_update:
      self.add_failure_to_flake(flake, flaky_run_key, failure_time)
    self.flaky_runs.increment_by(1)
Ejemplo n.º 5
0
def parse_cq_data(json_data):
  logging_output = []
  for result in json_data.get('results', {}):
    fields = result.get('fields', [])
    if not 'action' in fields:
      continue

    action = fields.get('action')
    if action != 'verifier_jobs_update':
      continue

    if fields.get('verifier') != 'try job':
      continue

    # At the moment, much of the parsing logic assumes this is a Chromium
    # tryjob.
    if fields.get('project') != 'chromium':
      continue

    job_states = fields.get('jobs', [])
    for state in job_states:
      # Just go by |result|.
      #if state not in ['JOB_SUCCEEDED', 'JOB_FAILED', 'JOB_TIMED_OUT']:
      #  continue

      for job in job_states[state]:
        build_properties = job.get('build_properties')
        if not build_properties:
          continue

        try:
          master = job['master']
          builder = job['builder']
          result = job['result']
          timestamp_tz = dateutil.parser.parse(job['timestamp'])
          # We assume timestamps from chromium-cq-status are already in UTC.
          timestamp = timestamp_tz.replace(tzinfo=None)
        except KeyError:
          continue

        try:
          buildnumber = get_int_value(build_properties, 'buildnumber')
          issue = get_int_value(build_properties, 'issue')
          patchset = get_int_value(build_properties, 'patchset')
          attempt_start_ts = get_int_value(build_properties, 'attempt_start_ts')
          time_started = datetime.datetime.utcfromtimestamp(
              attempt_start_ts / 1000000)
        except ValueError:
          continue

        if build_result.isResultPending(result):
          continue

        # At this point, only success or failure.
        success = build_result.isResultSuccess(result)

        patchset_builder_runs = get_patchset_builder_runs(issue=issue,
                                                          patchset=patchset,
                                                          master=master,
                                                          builder=builder)

        build_run = BuildRun(parent=patchset_builder_runs.key,
                             buildnumber=buildnumber,
                             result=result,
                             time_started=time_started,
                             time_finished=timestamp)

        previous_runs = BuildRun.query(
            ancestor=patchset_builder_runs.key).fetch()

        duplicate = False
        for previous_run in previous_runs:
          # We saw this build run already or there are multiple green runs,
          # in which case we ignore subsequent ones to avoid showing failures
          # multiple times.
          if (previous_run.buildnumber == buildnumber) or \
             (build_run.is_success and previous_run.is_success) :
            duplicate = True
            break

        if duplicate:
          continue

        build_run.put()

        for previous_run in previous_runs:
          if previous_run.is_success == build_run.is_success:
            continue
          if success:
            # We saw the flake and then the pass.
            failure_run = previous_run
            success_run = build_run
          else:
            # We saw the pass and then the failure. Could happen when fetching
            # historical data, or for the bot_update step (patch can't be
            # applied cleanly anymore).
            failure_run = build_run
            success_run = previous_run

          logging_output.append(failure_run.key.parent().get().builder +
                                str(failure_run.buildnumber))

          # Queue a task to fetch the error of this failure and create FlakyRun.
          flakes_metric.increment_by(1)
          taskqueue.add(
              queue_name='issue-updates',
              url='/issues/create_flaky_run',
              params={'failure_run_key': failure_run.key.urlsafe(),
                      'success_run_key': success_run.key.urlsafe()})

  return logging_output
Ejemplo n.º 6
0
  def post(self):
    if (not self.request.get('failure_run_key') or
        not self.request.get('success_run_key')):
      self.response.set_status(400, 'Invalid request parameters')
      return

    failure_run = ndb.Key(urlsafe=self.request.get('failure_run_key')).get()
    success_run = ndb.Key(urlsafe=self.request.get('success_run_key')).get()

    flaky_run = FlakyRun(
        failure_run=failure_run.key,
        failure_run_time_started=failure_run.time_started,
        failure_run_time_finished=failure_run.time_finished,
        success_run=success_run.key)

    failure_time = failure_run.time_finished
    patchset_builder_runs = failure_run.key.parent().get()

    master = BuildRun.removeMasterPrefix(patchset_builder_runs.master)
    url = ('https://luci-milo.appspot.com/'
           'prpc/milo.Buildbot/GetBuildbotBuildJSON')
    request = json.dumps({
        'master': master,
        'builder': patchset_builder_runs.builder,
        'buildNum': failure_run.buildnumber,
    })
    headers = {
        'Content-Type': 'application/json',
        'Accept': 'application/json',
    }
    urlfetch.set_default_fetch_deadline(60)
    logging.info('get_flaky_run_reason: %s, %s', url, request)
    response = urlfetch.fetch(
        url, payload=request, method=urlfetch.POST, headers=headers,
        validate_certificate=True)
    if response.status_code != 200:
      logging.error('The request to %s has returned %d: %s', url,
                    response.status_code, response.content)
      self.response.set_status(500, 'Failed to fetch build.')
      return
    content = response.content
    if content.startswith(_MILO_RESPONSE_PREFIX):
      content = content[len(_MILO_RESPONSE_PREFIX):]
    data = json.loads(content)['data']
    json_result = json.loads(base64.b64decode(data))
    steps = json_result['steps']

    failed_steps = []
    passed_steps = []
    for step in steps:
      result = step['results'][0]
      if build_result.isResultSuccess(result):
        passed_steps.append(step)
        continue
      if not build_result.isResultFailure(result):
        continue
      # For Luci builds, some steps don't have step text anymore. Such steps
      # include 'Failure reason', 'analyze', etc.
      step_text = ' '.join(step['text'] or [])
      step_name = step['name']
      if step_name in IGNORED_STEPS:
        continue

      # Custom (non-trivial) rules for ignoring flakes in certain steps:
      #  - [swarming] ...: summary step would also be red (do not double count)
      #  - Patch failure: ingore non-infra failures as they are typically due to
      #    changes in the code on HEAD
      #  - bot_update PATCH FAILED: Duplicates failure in 'Patch failure' step.
      #  - ... (retry summary): this is an artificial step to fail the build due
      #    to another step that has failed earlier (do not double count).
      if (step_name.startswith('[swarming]') or
          (step_name == 'Patch failure' and result != build_result.EXCEPTION) or
          (step_name == 'bot_update' and 'PATCH FAILED' in step_text)):
        continue

      failed_steps.append(step)

    steps_to_ignore = []
    for step in failed_steps:
      step_name = step['name']
      if '(with patch)' in step_name:
        # Ignore any steps from the same test suite, which is determined by the
        # normalized step name. Additionally, if the step fails without patch,
        # ignore the original step as well because tree is busted.
        normalized_step_name = normalize_test_type(step_name, True)
        for other_step in failed_steps:
          if other_step == step:
            continue
          normalized_other_step_name = normalize_test_type(
              other_step['name'], True)
          if normalized_other_step_name == normalized_step_name:
            steps_to_ignore.append(other_step['name'])
            if '(without patch)' in other_step['name']:
              steps_to_ignore.append(step['name'])

    flakes_to_update = []
    for step in failed_steps:
      step_name = step['name']
      if step_name in steps_to_ignore:
        continue
      flakes, is_step = self.get_flakes(
          master, patchset_builder_runs.builder, failure_run.buildnumber, step)
      if is_step and not is_infra_step_flake(step_name):
        continue  # Ignore flakes of non-infra steps.
      for flake in flakes:
        flake_occurrence = FlakeOccurrence(name=step_name, failure=flake)
        flaky_run.flakes.append(flake_occurrence)
        flakes_to_update.append((flake, is_step))

    # Do not create FlakyRuns if all failed steps have been ignored.
    if not flaky_run.flakes:
      return

    flaky_run_key = flaky_run.put()
    for flake, is_step in flakes_to_update:
      if self.is_duplicate_occurrence(flake, flaky_run):
        logging.info('Not adding duplicate occurrence for the same CL')
        continue
      self.add_failure_to_flake(flake, flaky_run_key, failure_time, is_step)
    self.flaky_runs.increment_by(1)
Ejemplo n.º 7
0
def parse_cq_data(json_data):
    logging_output = []
    for result in json_data.get('results', {}):
        fields = result.get('fields', [])
        if not 'action' in fields:
            logging.warning('Missing field action in status record')
            parsing_errors.increment_by(1)
            continue

        action = fields.get('action')
        if action != 'verifier_jobs_update':
            continue

        if fields.get('verifier') != 'try job':
            continue

        # At the moment, much of the parsing logic assumes this is a Chromium
        # tryjob.
        project = fields.get('project')
        if project != 'chromium/chromium/src':
            logging.info('project not chromium: %s', project)
            continue

        job_states = fields.get('jobs', {})

        for job in itertools.chain.from_iterable(job_states.values()):
            try:
                builder = job['builder']
                result = job['result']
                timestamp_tz = dateutil.parser.parse(
                    job.get('created_ts') or job['timestamp'])
                # We assume timestamps from chromium-cq-status are already in UTC.
                timestamp = timestamp_tz.replace(tzinfo=None)
            except KeyError:
                logging.warning('Failed to parse job details', exc_info=True)
                parsing_errors.increment_by(1)
                continue

            if build_result.isResultPending(result):
                continue

            build_properties = job.get('build_properties')
            if not build_properties:
                logging.warning(
                    'Missing field build_properties in job details')
                parsing_errors.increment_by(1)
                continue

            issue = -1
            patchset = -1
            time_started = 0

            try:
                buildnumber = get_int_value(build_properties, 'buildnumber')
                if 'patch_issue' in build_properties:
                    issue = get_int_value(build_properties, 'patch_issue')
                else:  # pragma: no cover
                    logging.warning('no issue')

                if 'patch_set' in build_properties:
                    patchset = get_int_value(build_properties, 'patch_set')
                else:  # pragma: no cover
                    logging.warning('no patchset')

                if 'attempt_start_ts' in build_properties:
                    attempt_start_ts = get_int_value(build_properties,
                                                     'attempt_start_ts')
                    time_started = datetime.datetime.utcfromtimestamp(
                        attempt_start_ts / 1000000)
                else:  # pragma: no cover
                    logging.warning('no attempt_start_ts')
                    continue

                # For builds through Buildbucket, job['master'] is actually the bucket
                # name. For buildbot-based builds, it just happens to be the same as the
                # master name. For Luci-based builds, it is different from the master
                # name, and the master name is set as a build property instead.
                # https://chromium.googlesource.com/chromium/src/+/infra/config/cr-buildbucket.cfg#115
                # So in either case, the "real" master name is in the build properties.
                master = build_properties['mastername']

            except (ValueError, KeyError):
                logging.warning('Failed to parse build properties',
                                exc_info=True)
                parsing_errors.increment_by(1)
                continue

            # At this point, only success or failure.
            success = build_result.isResultSuccess(result)

            patchset_builder_runs = get_patchset_builder_runs(
                issue=issue, patchset=patchset, master=master, builder=builder)

            build_run = BuildRun(parent=patchset_builder_runs.key,
                                 buildnumber=buildnumber,
                                 result=result,
                                 time_started=time_started,
                                 time_finished=timestamp)

            previous_runs = BuildRun.query(
                ancestor=patchset_builder_runs.key).fetch()

            duplicate = False
            for previous_run in previous_runs:
                # We saw this build run already or there are multiple green runs,
                # in which case we ignore subsequent ones to avoid showing failures
                # multiple times.
                if (previous_run.buildnumber == buildnumber) or \
                   (build_run.is_success and previous_run.is_success) :
                    duplicate = True
                    break

            if duplicate:
                continue

            build_run.put()

            for previous_run in previous_runs:
                if previous_run.is_success == build_run.is_success:
                    continue
                if success:
                    # We saw the flake and then the pass.
                    failure_run = previous_run
                    success_run = build_run
                else:
                    # We saw the pass and then the failure. Could happen when fetching
                    # historical data, or for the bot_update step (patch can't be
                    # applied cleanly anymore).
                    failure_run = build_run
                    success_run = previous_run

                logging_output.append(failure_run.key.parent().get().builder +
                                      str(failure_run.buildnumber))

                # Queue a task to fetch the error of this failure and create FlakyRun.
                flakes_metric.increment_by(1)
                taskqueue.add(queue_name='issue-updates',
                              url='/issues/create_flaky_run',
                              params={
                                  'failure_run_key': failure_run.key.urlsafe(),
                                  'success_run_key': success_run.key.urlsafe()
                              })

    return logging_output
Ejemplo n.º 8
0
def parse_cq_data(json_data):
  logging_output = []
  for result in json_data['results']:
    fields = result['fields']
    if not 'action' in fields:
      continue

    action = fields['action']
    if action != 'verifier_jobs_update':
      continue

    if fields['verifier'] != 'try job':
      continue

    job_states = fields['jobs']
    for state in job_states:
      # Just go by |result|.
      #if state not in ['JOB_SUCCEEDED', 'JOB_FAILED', 'JOB_TIMED_OUT']:
      #  continue

      for job in job_states[state]:
        build_properties = job['build_properties']
        if not build_properties:
          continue

        master = job['master']
        builder = job['builder']
        result = job['result']
        timestamp = datetime.datetime.strptime(job['timestamp'],
                                               '%Y-%m-%d %H:%M:%S.%f')
        try:
          buildnumber = get_int_value(build_properties, 'buildnumber')
          issue = get_int_value(build_properties, 'issue')
          patchset = get_int_value(build_properties, 'patchset')
        except ValueError as e:
          continue

        if build_result.isResultPending(result):
          continue

        # At this point, only success or failure.
        success = build_result.isResultSuccess(result)

        patchset_builder_runs = get_patchset_builder_runs(issue=issue,
                                                          patchset=patchset,
                                                          master=master,
                                                          builder=builder)

        build_run = BuildRun(parent=patchset_builder_runs.key,
                             buildnumber=buildnumber,
                             result=result,
                             time_finished=timestamp)

        previous_runs = BuildRun.query(
            ancestor=patchset_builder_runs.key).fetch()

        duplicate = False
        for previous_run in previous_runs:
          # We saw this build run already or there are multiple green runs,
          # in which case we ignore subsequent ones to avoid showing failures
          # multiple times.
          if (previous_run.buildnumber == buildnumber) or \
             (build_run.is_success and previous_run.is_success) :
            duplicate = True
            break

        if duplicate:
          continue

        build_run.put()

        for previous_run in previous_runs:
          if previous_run.is_success == build_run.is_success:
            continue
          if success:
            # We saw the flake and then the pass.
            flaky_run = FlakyRun(
                failure_run=previous_run.key,
                failure_run_time_finished=previous_run.time_finished,
                success_run=build_run.key)
            flaky_run.put()
            logging_output.append(previous_run.key.parent().get().builder +
                                  str(previous_run.buildnumber))
          else:
            # We saw the pass and then the failure. Could happen when fetching
            # historical data.
            flaky_run = FlakyRun(
                failure_run=build_run.key,
                failure_run_time_finished=build_run.time_finished,
                success_run=previous_run.key)
            flaky_run.put()
            logging_output.append(build_run.key.parent().get().builder +
                                  str(build_run.buildnumber))

          # Queue a task to fetch the error of this failure.
          deferred.defer(get_flaky_run_reason, flaky_run.key)

  return logging_output
Ejemplo n.º 9
0
def get_flaky_run_reason(flaky_run_key):
  flaky_run = flaky_run_key.get()
  failure_run = flaky_run.failure_run.get()
  patchset_builder_runs = failure_run.key.parent().get()
  url = ('http://build.chromium.org/p/' + patchset_builder_runs.master +
         '/json/builders/' + patchset_builder_runs.builder +'/builds/' +
         str(failure_run.buildnumber))
  urlfetch.set_default_fetch_deadline(60)
  logging.info('get_flaky_run_reason ' + url)
  result = urlfetch.fetch(url).content
  try:
    json_result = json.loads(result)
  except ValueError:
    logging.error('couldnt decode json for ' + url)
    return
  steps = json_result['steps']

  failed_steps = []
  passed_steps = []
  for step in steps:
    result = step['results'][0]
    if build_result.isResultSuccess(result):
      passed_steps.append(step)
      continue
    if not build_result.isResultFailure(result):
      continue
    step_name = step['name']
    if step_name == 'steps' or step_name.startswith('[swarming]') or \
       step_name == 'presubmit':
      # recipe code shows errors twice with first being 'steps'. also when a
      # swarming test fails, it shows up twice. also ignore 'presubmit' since
      # it changes from fail to pass for same patchset depending on new lgtm.
      continue
    failed_steps.append(step)

  steps_to_ignore = []
  for step in failed_steps:
    step_name = step['name']
    if ' (with patch)' in step_name:
      step_name_with_no_modifier = step_name.replace(' (with patch)', '')
      for other_step in failed_steps:
        # A step which fails, and then is retried and also fails, will have its
        # name without the ' (with patch)' again. Don't double count.
        if other_step['name'] == step_name_with_no_modifier:
          steps_to_ignore.append(other_step['name'])

      # If a step fails without the patch, then the tree is busted. Don't count
      # as flake.
      step_name_without_patch = step_name_with_no_modifier + ' (without patch)'
      for other_step in failed_steps:
        if other_step['name'] == step_name_without_patch:
          steps_to_ignore.append(step['name'])
          steps_to_ignore.append(other_step['name'])

  for step in failed_steps:
    step_name = step['name']
    if step_name in steps_to_ignore:
      continue
    flakes = get_flakes(step)
    if not flakes:
      continue
    for flake in flakes:
      flake_occurance = FlakeOccurance(name=step_name, failure=flake)
      flaky_run.flakes.append(flake_occurance)

      add_failure_to_flake(flake, flaky_run)
  flaky_run.put()