Beispiel #1
0
def UpdateSuspectedCL(repo_name, revision, commit_position, approach,
                      master_name, builder_name, build_number, cl_failure_type,
                      failures, top_score):

  suspected_cl = (
      WfSuspectedCL.Get(repo_name, revision) or
      WfSuspectedCL.Create(repo_name, revision, commit_position))

  if not suspected_cl.identified_time:  # pragma: no cover.
    suspected_cl.identified_time = time_util.GetUTCNow()

  suspected_cl.updated_time = time_util.GetUTCNow()

  if approach not in suspected_cl.approaches:
    suspected_cl.approaches.append(approach)
  if cl_failure_type not in suspected_cl.failure_type:
    suspected_cl.failure_type.append(cl_failure_type)

  build_key = BaseBuildModel.CreateBuildKey(master_name, builder_name,
                                            build_number)
  if build_key not in suspected_cl.builds:
    suspected_cl.builds[build_key] = {
        'approaches': [approach],
        'failure_type': cl_failure_type,
        'failures': failures,
        'status': _GetsStatusFromSameFailure(suspected_cl.builds, failures),
        'top_score': top_score
    }
  else:
    build = suspected_cl.builds[build_key]
    if approach not in build['approaches']:
      build['approaches'].append(approach)

  suspected_cl.put()
Beispiel #2
0
  def Initialize(self, crash_data):
    """(Re)Initialize a CrashAnalysis ndb.Model from ``CrashData``.

    This method is only ever called from _NeedsNewAnalysis which is only
    ever called from ScheduleNewAnalysis. It is used for filling in the
    fields of a CrashAnalysis ndb.Model for the first time (though it
    can also be used to re-initialize a given CrashAnalysis). Subclasses
    should extend (not override) this to (re)initialize any
    client-specific fields they may have.
    """
    # Get rid of any previous values there may have been.
    self.Reset()

    # Set the version.
    self.crashed_version = crash_data.crashed_version

    self.stack_trace = crash_data.raw_stacktrace
    self.stacktrace = crash_data.stacktrace
    self.signature = crash_data.signature
    self.platform = crash_data.platform
    self.regression_range = crash_data.regression_range
    self.dependencies = crash_data.dependencies
    self.dependency_rolls = crash_data.dependency_rolls
    self.identifiers = crash_data.identifiers

    # Set progress properties.
    self.status = analysis_status.PENDING
    self.requested_time = time_util.GetUTCNow()
    self.started_time = time_util.GetUTCNow()
Beispiel #3
0
    def HandleGet(self):
        client_id = self.request.get('client_id', CrashClient.CRACAS)

        now = time_util.GetUTCNow()
        last_week = time_util.GetUTCNow() - timedelta(days=7)

        start_date, end_date = time_util.GetStartEndDates(
            self.request.get('start_date'),
            self.request.get('end_date'),
            default_start=last_week,
            default_end=now)

        publish_to_client = bool(self.request.get('publish'))
        count = 0
        for crash_keys in IterateCrashBatches(client_id, start_date, end_date):
            pipeline = RerunPipeline(client_id, crash_keys, publish_to_client)
            # Attribute defined outside __init__ - pylint: disable=W0201
            pipeline.target = appengine_util.GetTargetNameForModule(
                RERUN_SERVICE)
            pipeline.start(queue_name=RERUN_QUEUE)
            count += 1

        if count == 0:
            message = 'No rerun pipeline started.'
        else:
            message = '%d rerun pipeline(s) kicked off.' % count

        return {'data': {'message': message}}
def _GetCQHiddenFlakeQueryStartTime():
    """Gets the latest happen time of cq hidden flakes.

  Uses this time to decide if we should run the query for cq hidden flakes.
  And also uses this time to decides the start time of the query.

  Returns:
    (str): String representation of a datetime in the format
      %Y-%m-%d %H:%M:%S UTC.
  """
    last_query_time_right_bourndary = time_util.GetUTCNow() - timedelta(
        hours=_CQ_HIDDEN_FLAKE_QUERY_HOUR_INTERVAL)
    hidden_flake_query_start_time = time_util.FormatDatetime(
        time_util.GetUTCNow() -
        timedelta(hours=_CQ_HIDDEN_FLAKE_QUERY_HOUR_INTERVAL +
                  _ROUGH_MAX_BUILD_CYCLE_HOURS,
                  minutes=_CQ_HIDDEN_FLAKE_QUERY_OVERLAP_MINUTES))
    hidden_flake_query_end_time = time_util.FormatDatetime(
        time_util.GetUTCNow() -
        timedelta(hours=_CQ_HIDDEN_FLAKE_QUERY_HOUR_INTERVAL))

    last_query_time = _GetLastCQHiddenFlakeQueryTime()

    if not last_query_time:
        # Only before the first time of running the query.
        return hidden_flake_query_start_time, hidden_flake_query_end_time
    return ((hidden_flake_query_start_time, hidden_flake_query_end_time)
            if last_query_time <= last_query_time_right_bourndary else
            (None, None))
def _GetETAToStartAnalysis(manually_triggered):
    """Returns an ETA as of a UTC datetime.datetime to start the analysis.

  If not urgent, Swarming tasks should be run off PST peak hours from 11am to
  6pm on workdays.

  Args:
    manually_triggered (bool): True if the analysis is from manual request, like
        by a Chromium sheriff.

  Returns:
    The ETA as of a UTC datetime.datetime to start the analysis.
  """
    if manually_triggered:
        # If the analysis is manually triggered, run it right away.
        return time_util.GetUTCNow()

    now_at_pst = time_util.GetPSTNow()
    if now_at_pst.weekday() >= 5:  # PST Saturday or Sunday.
        return time_util.GetUTCNow()

    if now_at_pst.hour < 11 or now_at_pst.hour >= 18:  # Before 11am or after 6pm.
        return time_util.GetUTCNow()

    # Set ETA time to 6pm, and also with a random latency within 30 minutes to
    # avoid sudden burst traffic to Swarming.
    diff = timedelta(hours=18 - now_at_pst.hour,
                     minutes=-now_at_pst.minute,
                     seconds=-now_at_pst.second + random.randint(0, 30 * 60),
                     microseconds=-now_at_pst.microsecond)
    eta = now_at_pst + diff

    # Convert back to UTC.
    return time_util.ConvertPSTToUTC(eta)
Beispiel #6
0
def _UpdateCulprit(culprit_urlsafe_key,
                   revert_status=None,
                   revert_cl=None,
                   skip_revert_reason=None,
                   revert_submission_status=None):
    """Updates culprit entity."""
    culprit = entity_util.GetEntityFromUrlsafeKey(culprit_urlsafe_key)
    assert culprit
    culprit.should_be_reverted = True

    culprit.revert_status = revert_status or culprit.revert_status
    culprit.revert_cl = revert_cl or culprit.revert_cl
    culprit.skip_revert_reason = skip_revert_reason or culprit.skip_revert_reason
    culprit.revert_submission_status = (revert_submission_status
                                        or culprit.revert_submission_status)

    if culprit.revert_status != analysis_status.RUNNING:  # pragma: no branch
        # Only stores revert_pipeline_id when the revert is ongoing.
        culprit.revert_pipeline_id = None

    if revert_cl:
        culprit.cr_notification_status = analysis_status.COMPLETED
        culprit.revert_created_time = time_util.GetUTCNow()
        culprit.cr_notification_time = time_util.GetUTCNow()

    if (culprit.revert_submission_status !=
            analysis_status.RUNNING):  # pragma: no branch
        culprit.submit_revert_pipeline_id = None

    if culprit.revert_submission_status == analysis_status.COMPLETED:
        culprit.revert_committed_time = time_util.GetUTCNow()

    culprit.put()
def NeedANewAnalysis(master_name, builder_name, build_number, failed_steps,
                     build_completed, force):
    """Checks status of analysis for the build and decides if a new one is needed.

  A WfAnalysis entity for the given build will be created if none exists.
  When a new analysis is needed, this function will create and save a WfAnalysis
  entity to the datastore, or it will reset the existing one but still keep the
  result of last analysis.

  Returns:
    True if an analysis is needed, otherwise False.
  """
    analysis = WfAnalysis.Get(master_name, builder_name, build_number)

    if not analysis:
        # The build failure is not analyzed yet.
        analysis = WfAnalysis.Create(master_name, builder_name, build_number)
        analysis.status = analysis_status.PENDING
        analysis.request_time = time_util.GetUTCNow()
        analysis.put()
        return True
    elif force:
        # A new analysis could be forced if last analysis was completed.
        if not analysis.completed:
            # TODO: start a new analysis if the last one has started running but it
            # has no update for a considerable amount of time, eg. 10 minutes.
            logging.info(
                'Existing analysis is not completed yet. No new analysis.')
            return False

        analysis.Reset()
        analysis.request_time = time_util.GetUTCNow()
        analysis.put()
        return True
    elif failed_steps and analysis.completed:
        # If there is any new failed step, a new analysis is needed.
        for step in failed_steps:
            analyzed = any(step == s for s in analysis.not_passed_steps)
            if analyzed:
                continue

            logging.info('At least one new failed step is detected: %s', step)
            analysis.Reset()
            analysis.request_time = time_util.GetUTCNow()
            analysis.put()
            return True

    # Start a new analysis if the build cycle wasn't completed in last analysis,
    # but now it is completed. This will potentially trigger a try-job run.
    if analysis.completed and not analysis.build_completed and build_completed:
        return True

    # TODO: support following cases
    # * Automatically retry if last analysis failed with errors.
    # * Analysis is not complete and no update in the last 5 minutes.
    logging.info('Not match any cases. No new analysis.')
    return False
def _CreateIssuesForFlakes(flake_groups_to_create_issue,
                           num_of_issues_to_create):
    """Creates monorail bugs.

  Args:
    flake_groups_to_create_issue([FlakeGroupByOccurrences]]): A list of flake
      groups that are not yet linked with a FlakeIssue.
    num_of_issues_to_create(int): Total number for
      - New bugs created
      - Existing bugs found, linked to flakes and then updated.
      Note that it's possible that more bugs are found and linked to flakes, but
      will not be updated because of the limit.
  """
    for flake_group in flake_groups_to_create_issue:
        try:
            if len(flake_group.flakes) == 1:
                # A single flake in group, uses this flake's info to look for or create
                # a bug. If num_of_issues_to_create has reached 0, only looks for
                # existing monorail bug for it.
                issue_generator = FlakeDetectionIssueGenerator(
                    flake_group.flakes[0], flake_group.num_occurrences)
                issue_id = _CreateIssueForFlake(
                    issue_generator,
                    flake_group.flakes[0],
                    create_or_update_bug=num_of_issues_to_create > 0)
            elif num_of_issues_to_create > 0:
                # Multiple flakes in group, only creates a bug when the bug count has
                # not reached the limit.
                issue_id = _CreateIssueForFlakeGroup(flake_group)
            else:
                # Multiple flakes in group, and no more bug is allowed.
                issue_id = None

            if issue_id and num_of_issues_to_create > 0:
                # A monorail bug has been created or updated.
                num_of_issues_to_create -= 1

                # Updates FlakeIssue's last updated_time_by_flake_detection property.
                # This property is only applicable to Flake Detection because Flake
                # Detection can update an issue at most once every 24 hours.
                # Also change last_updated_time_in_monorail to keep in sync.
                flake_issue = GetFlakeIssue(flake_group.flakes[0])
                flake_issue.last_updated_time_by_flake_detection = time_util.GetUTCNow(
                )
                flake_issue.last_updated_time_in_monorail = time_util.GetUTCNow(
                )
                flake_issue.put()

        except HttpError as error:
            # Benign exceptions (HttpError 403) may happen when FindIt tries to
            # update an issue that it doesn't have permission to. Do not raise
            # exception so that the for loop can move on to create or update next
            # issues.
            logging.warning(
                'Failed to create or update issue due to error: %s', error)
Beispiel #9
0
  def run(self, *_args, **_kwargs):
    """Call predator to do the analysis of the given crash.

    N.B., due to the structure of AppEngine pipelines, this method must
    accept the same arguments as are passed to ``__init__``; however,
    because they were already passed to ``__init__`` there's no use in
    recieving them here. Thus, we discard all the arguments to this method
    (except for ``self``, naturally).
    """
    # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis
    # to guard against race conditions?
    analysis = self._findit.GetAnalysis(self._crash_identifiers)

    # Update the model's status to say we're in the process of doing analysis.
    analysis.pipeline_status_path = self.pipeline_status_path()
    analysis.status = analysis_status.RUNNING
    analysis.started_time = time_util.GetUTCNow()
    analysis.findit_version = appengine_util.GetCurrentVersion()
    analysis.put()

    # Actually do the analysis.
    culprit = self._findit.FindCulprit(analysis.ToCrashReport())
    if culprit is not None:
      result, tags = culprit.ToDicts()
    else:
      result = {'found': False}
      tags = {
          'found_suspects': False,
          'found_project': False,
          'found_components': False,
          'has_regression_range': False,
          'solution': None,
      }

    # Update model's status to say we're done, and save the results.
    analysis.completed_time = time_util.GetUTCNow()
    analysis.result = result
    for tag_name, tag_value in tags.iteritems():
      # TODO(http://crbug.com/602702): make it possible to add arbitrary tags.
      # TODO(http://crbug.com/659346): we misplaced the coverage test; find it!
      if hasattr(analysis, tag_name): # pragma: no cover
        setattr(analysis, tag_name, tag_value)

      if hasattr(monitoring, tag_name):
        metric = getattr(monitoring, tag_name)
        metric.increment({tag_name: tag_value,
                          'client_id': self.client_id})

    analysis.status = analysis_status.COMPLETED
    analysis.put()
Beispiel #10
0
def DownloadBuildData(master_name, builder_name, build_number):
    """Downloads build data and returns a WfBuild instance."""
    build = WfBuild.Get(master_name, builder_name, build_number)
    if not build:
        build = WfBuild.Create(master_name, builder_name, build_number)

    # Cache the data to avoid pulling from master again.
    if _BuildDataNeedUpdating(build):
        use_cbe = waterfall_config.GetDownloadBuildDataSettings().get(
            'use_chrome_build_extract')

        if use_cbe:
            # Retrieve build data from build archive first.
            build.data = buildbot.GetBuildDataFromArchive(
                master_name, builder_name, build_number,
                HTTP_CLIENT_NO_404_ERROR)

            if build.data:
                build.data_source = CHROME_BUILD_EXTRACT
            elif not lock_util.WaitUntilDownloadAllowed(
                    master_name):  # pragma: no cover
                return None

        if not build.data or not use_cbe:
            # Retrieve build data from build master.
            build.data = buildbot.GetBuildDataFromBuildMaster(
                master_name, builder_name, build_number,
                HTTP_CLIENT_LOGGING_ERRORS)
            build.data_source = BUILDBOT_MASTER

        build.last_crawled_time = time_util.GetUTCNow()
        build.put()

    return build
def _SavesNewCLConfidence():
    """Queries all CLs and calculates confidence of each type of results."""
    date_end = time_util.GetUTCNow().replace(hour=0,
                                             minute=0,
                                             second=0,
                                             microsecond=0)
    date_start = date_end - datetime.timedelta(days=TIME_RANGE_DAYS)
    result_heuristic = _GetCLDataForHeuristic(date_start, date_end)
    result_try_job, result_both = _GetCLDataForTryJob(date_start, date_end)

    new_compile_heuristic = _CalculateConfidenceLevelsForHeuristic(
        result_heuristic[failure_type.COMPILE])
    new_test_heuristic = _CalculateConfidenceLevelsForHeuristic(
        result_heuristic[failure_type.TEST])
    new_compile_try_job = _CreateConfidenceInformation(
        result_try_job[failure_type.COMPILE])
    new_test_try_job = _CreateConfidenceInformation(
        result_try_job[failure_type.TEST])
    new_compile_heuristic_try_job = _CreateConfidenceInformation(
        result_both[failure_type.COMPILE])
    new_test_heuristic_try_job = _CreateConfidenceInformation(
        result_both[failure_type.TEST])

    confidence = SuspectedCLConfidence.Get()
    confidence.Update(date_start, date_end, new_compile_heuristic,
                      new_compile_try_job, new_compile_heuristic_try_job,
                      new_test_heuristic, new_test_try_job,
                      new_test_heuristic_try_job)
    return confidence
Beispiel #12
0
def CountRecentCommits(repo_url,
                       ref='refs/heads/master',
                       time_period=datetime.timedelta(hours=1)):
    """Gets the number of commits that landed recently.

  By default, this function will count the commits landed in the master ref
  during last hour, but can be used to count the commits landed in any ref in
  the most recent period of any arbitrary size.

  Args:
    repo_url (str): Url to the repo.
    ref (str): ref to count commits on.
    time_period (datetime.delta): window of time in which to count commits.

  Returns:
    An integer representing the number of commits that landed in the last
    hour.
  """
    count = 0
    cutoff = time_util.GetUTCNow() - time_period
    git_repo = NonCachedGitilesRepository(FinditHttpClient(), repo_url, ref)
    next_rev = ref
    while next_rev:
        # 100 is a reasonable size for a page.
        # This assumes that GetNChangeLogs returns changelogs in newer to older
        # order.
        logs, next_rev = git_repo.GetNChangeLogs(next_rev, 100)
        for log in logs:
            if log.committer.time >= cutoff:
                count += 1
            else:
                return count
    return count
Beispiel #13
0
    def run(self, failure_info, change_logs, deps_info, signals,
            build_completed):
        """Identifies culprit CL.

    Args:
      failure_info (dict): Output of pipeline DetectFirstFailurePipeline.
      change_logs (dict): Output of pipeline PullChangelogPipeline.
      signals (dict): Output of pipeline ExtractSignalPipeline.

    Returns:
      analysis_result returned by build_failure_analysis.AnalyzeBuildFailure.
    """
        master_name = failure_info['master_name']
        builder_name = failure_info['builder_name']
        build_number = failure_info['build_number']

        analysis_result, suspected_cls = build_failure_analysis.AnalyzeBuildFailure(
            failure_info, change_logs, deps_info, signals)
        analysis = WfAnalysis.Get(master_name, builder_name, build_number)
        analysis.build_completed = build_completed
        analysis.result = analysis_result
        analysis.status = analysis_status.COMPLETED
        analysis.result_status = _GetResultAnalysisStatus(analysis_result)
        analysis.suspected_cls = _GetSuspectedCLsWithOnlyCLInfo(suspected_cls)
        analysis.end_time = time_util.GetUTCNow()
        analysis.put()

        # Save suspected_cls to data_store.
        _SaveSuspectedCLs(suspected_cls, failure_info['master_name'],
                          failure_info['builder_name'],
                          failure_info['build_number'],
                          failure_info['failure_type'])
        return analysis_result
Beispiel #14
0
def _GetMatchingFailureGroups(build_failure_type):
    earliest_time = time_util.GetUTCNow() - timedelta(
        seconds=waterfall_config.GetTryJobSettings().get(
            'max_seconds_look_back_for_group'))
    return WfFailureGroup.query(
        ndb.AND(WfFailureGroup.build_failure_type == build_failure_type,
                WfFailureGroup.created_time >= earliest_time)).fetch()
Beispiel #15
0
def AnalyzeDetectedFlakeOccurrence(flake, flake_occurrence, bug_id):
    """Analyze detected flake occurrence by Flake Detection.

  Args:
    flake (Flake): The Flake triggering this analysis.
    flake_occurrece (FlakeOccurrence): A FlakeOccurrence model entity.
    bug_id (int): Id of the bug to update after the analysis finishes.
  """
    test_name = flake_occurrence.test_name
    analysis_request = FlakeAnalysisRequest.Create(test_name, False, bug_id)
    analysis_request.flake_key = flake.key

    master_name = flake_occurrence.build_configuration.legacy_master_name
    builder_name = flake_occurrence.build_configuration.luci_builder
    build_number = flake_occurrence.build_configuration.legacy_build_number
    step_ui_name = flake_occurrence.step_ui_name
    analysis_request.AddBuildStep(master_name, builder_name, build_number,
                                  step_ui_name, time_util.GetUTCNow())
    analysis_request.Save()

    logging.info('flake report for detected flake occurrence: %r',
                 analysis_request)
    AsyncProcessFlakeReport(analysis_request,
                            user_email=constants.DEFAULT_SERVICE_ACCOUNT,
                            is_admin=False)
Beispiel #16
0
def GenerateAuthToken(key_name, user_id, action_id='', when=None):
  """Generates a URL-safe token based on XSRFToken but for generla purpose.

  Args:
    key_name (str): name of secret key to generate token.
    user_id (str): the user ID of the authenticated user.
    action_id (str): a string identifier of the action they requested
      authorization for.
    when (datetime): the time when the user was authorized for this action.
      If not set the current utc time is used.
  Returns:
    A string token.
  """
  key = SecretKey.GetSecretKey(key_name)
  when = when or time_util.GetUTCNow()
  when_timestamp = time_util.ConvertToTimestamp(when)
  digester = hmac.new(key)
  digester.update(str(user_id))
  digester.update(_DELIMITER)
  digester.update(action_id)
  digester.update(_DELIMITER)
  digester.update(str(when_timestamp))
  digest = digester.digest()

  return base64.urlsafe_b64encode('%s%s%d' % (digest, _DELIMITER,
                                              when_timestamp))
def _GetDailyNumberOfRevertedCulprits(limit):
  earliest_time = time_util.GetUTCNow() - timedelta(days=1)
  # TODO(chanli): improve the check for a rare case when two pipelines revert
  # at the same time.
  return WfSuspectedCL.query(
      ndb.AND(WfSuspectedCL.failure_type == failure_type.COMPILE,
              WfSuspectedCL.revert_created_time >= earliest_time)).count(limit)
    def HandleGet(self):
        """Lists WfAnalysis entities detected to have been aborted."""
        midnight_today = datetime.combine(time_util.GetUTCNow(), time.min)
        start = self.request.get('start_date')
        end = self.request.get('end_date')

        start_date, end_date = _GetStartEndDates(start, end, midnight_today)

        analyses = WfAnalysis.query(
            ndb.AND(WfAnalysis.build_start_time >= start_date,
                    WfAnalysis.build_start_time < end_date, WfAnalysis.aborted
                    == True)).order(-WfAnalysis.build_start_time).fetch(_COUNT)

        analyses_data = []

        for analysis in analyses:
            analyses_data.append(_Serialize(analysis))

        data = {
            'start_date': time_util.FormatDatetime(start_date),
            'end_date': time_util.FormatDatetime(end_date),
            'analyses': analyses_data,
        }

        return {'template': 'pipeline_errors_dashboard.html', 'data': data}
Beispiel #19
0
def _FormatDisplayData(try_job_data):
    """Returns information of a WfTryJobData/FlakeTryJobData as a dict."""
    display_data = try_job_data.to_dict()

    for attribute in ('created_time', 'start_time', 'end_time',
                      'request_time'):
        display_data[attribute] = time_util.FormatDatetime(
            display_data[attribute])

    display_data['pending_time'] = (
        _FormatDuration(try_job_data.request_time, try_job_data.start_time)
        if try_job_data.start_time else _FormatDuration(
            try_job_data.created_time, time_util.GetUTCNow()))
    display_data['last_buildbucket_response'] = json.dumps(
        _PrepareBuildbucketResponseForDisplay(
            display_data['last_buildbucket_response']),
        sort_keys=True)

    if isinstance(try_job_data, FlakeTryJobData):
        # Flake try job data does not include try_job_type.
        display_data['try_job_type'] = 'flake'
        display_data['analysis_key'] = (try_job_data.analysis_key.urlsafe()
                                        if try_job_data.analysis_key else None)

    # Do not include the try job key in the response.
    display_data.pop('try_job_key', None)

    return display_data
Beispiel #20
0
 def _GetCQFlagsOrExplanation(self, commit_timestamp):
   delta = time_util.GetUTCNow() - commit_timestamp
   if delta.days > 1:
     return (
         '# Not skipping CQ checks because original CL landed > 1 day ago.\n\n'
     )
   return 'No-Presubmit: true\nNo-Tree-Checks: true\nNo-Try: true\n'
def _UpdateAnalysisStatusUponCompletion(analysis,
                                        suspected_build,
                                        status,
                                        error,
                                        build_confidence_score=None):
    analysis.end_time = time_util.GetUTCNow()
    analysis.status = status
    analysis.confidence_in_suspected_build = build_confidence_score
    analysis.try_job_status = analysis_status.SKIPPED
    analysis.suspected_flake_build_number = suspected_build
    analysis.result_status = (result_status.NOT_FOUND_UNTRIAGED
                              if suspected_build is None else
                              result_status.FOUND_UNTRIAGED)

    if error:
        analysis.error = error
    else:
        # Clear info about the last attempted swarming task since it will be stored
        # in the data point.
        analysis.last_attempted_swarming_task_id = None
        analysis.last_attempted_build_number = None

        if _HasSufficientConfidenceToRunTryJobs(analysis):
            # Analysis is not finished yet: try jobs are about to be run.
            analysis.try_job_status = None
            analysis.end_time = None

    analysis.put()
Beispiel #22
0
def CanAutoCreateRevert(culprit, parameters):
    """Checks if Findit can auto create a revert.

  Args:
    culprit (Basestring): Urlsafe key for the suspected cl.
    parameters (CulpritActionParameters): Parameters to run culprit action
      pipelines.

  Findit can auto create a revert if:
    1. Auto create revert for test is turned on;
    2. The number of reverts in past 24 hours is less than the daily limit;
    3. The culprit is also being suspected by the heuristic analysis.
  """
    heuristic_cls = parameters.heuristic_cls
    if culprit not in heuristic_cls:
        return False

    action_settings = waterfall_config.GetActionSettings()
    # Auto revert has been turned off.
    if not bool(action_settings.get('auto_create_revert')):
        return False

    auto_create_revert_daily_threshold_test = action_settings.get(
        'auto_create_revert_daily_threshold_test',
        _DEFAULT_AUTO_CREATE_REVERT_DAILY_THRESHOLD_TEST)
    # Auto revert has exceeded daily limit.
    if _GetDailyNumberOfRevertedCulprits(
            auto_create_revert_daily_threshold_test
    ) >= auto_create_revert_daily_threshold_test:
        logging.info(
            'Auto reverts for test culprits on %s has met daily limit.',
            time_util.FormatDatetime(time_util.GetUTCNow()))
        return False

    return True
Beispiel #23
0
 def Create(cls, repo_name, revision, commit_position):  # pragma: no cover
     instance = cls(key=cls._CreateKey(repo_name, revision))
     instance.repo_name = repo_name
     instance.revision = revision
     instance.commit_position = commit_position
     instance.identified_time = time_util.GetUTCNow()
     return instance
    def OnFinalized(self, parameters):
        if not self.IsRootPipeline():
            # AnalyzeFlakePipeline is recursive. Only the root pipeline should update.
            return

        analysis_urlsafe_key = parameters.analysis_urlsafe_key
        analysis = ndb.Key(urlsafe=analysis_urlsafe_key).get()
        assert analysis, 'Cannot retrieve analysis entry from datastore'

        # Get the analysis' already-detected error, if any.
        error = analysis.error

        if self.was_aborted:
            error = analysis.GetError()  # Capture any undetected error.
            monitoring.aborted_pipelines.increment({'type': 'flake'})

        status = analysis_status.ERROR if error else analysis_status.COMPLETED
        analysis.Update(error=error,
                        end_time=time_util.GetUTCNow(),
                        status=status)

        # TODO(crbug.com/847644): If error is set, report to ts_mon.

        # Monitor completion of pipeline.
        monitoring.completed_pipelines.increment({'type': 'flake'})
def _CheckForNewAnalysis(request, rerun=False):
    """Checks if a new analysis is needed for the requested flake.

  Args:
    request (FlakeAnalysisRequest): The request to analyze a flake.
    rerun (bool): Indicates a forced rerun by admin.

  Returns:
    (version_number, build_step)
    version_number (int): The version of the FlakeAnalysisRequest if a new
        analysis is needed; otherwise 0.
    build_step (BuildStep): a BuildStep instance if a new analysis is needed;
        otherwise None.
  """
    existing_request = FlakeAnalysisRequest.GetVersion(key=request.name)
    if not existing_request or (existing_request.bug_id and request.bug_id
                                and existing_request.bug_id != request.bug_id):
        # If no existing analysis or last analysis was for a different bug, randomly
        # pick one configuration for a new analysis.
        if existing_request:
            # Make a copy to preserve the version number of existing analysis and
            # prevent concurrent analyses of the same flake.
            user_emails = (email_util.ObscureEmails(
                existing_request.user_emails, ['google.com']) +
                           list(set(request.user_emails)))
            existing_request.CopyFrom(request)
            request = existing_request
            request.user_emails = user_emails
        request.user_emails_obscured = False
        request.user_emails_last_edit = time_util.GetUTCNow()

        swarmed, supported, supported_build_step = _CheckFlakeSwarmedAndSupported(
            request)
        request.swarmed = swarmed
        request.supported = supported

        if supported_build_step and not request.is_step:
            supported_build_step.scheduled = True  # This step will be analyzed.

        # For unsupported or step-level flakes, still save them for monitoring.
        _, saved = request.Save(
            retry_on_conflict=False)  # Create a new version.

        if not saved or not supported_build_step or request.is_step:
            # No new analysis if:
            # 1. Another analysis was just triggered.
            # 2. No representative step is Swarmed Gtest.
            # 3. The flake is a step-level one.
            return 0, None

        return request.version_number, supported_build_step
    else:
        # If no bug is attached to the existing analysis or the new request, or both
        # are attached to the same bug, start a new analysis with a different
        # configuration. For a configuration that was analyzed 7 days ago, reset it
        # to use the new reported step of the same configuration.
        # TODO: move this setting to config.
        return _MergeNewRequestIntoExistingOne(request, existing_request,
                                               rerun)
Beispiel #26
0
 def CreateFlakeAnalysisRequest(flake):
   analysis_request = FlakeAnalysisRequest.Create(
       flake.name, flake.is_step, flake.bug_id)
   for step in flake.build_steps:
     analysis_request.AddBuildStep(step.master_name, step.builder_name,
                                   step.build_number, step.step_name,
                                   time_util.GetUTCNow())
   return analysis_request
    def RunImpl(self, build_key):
        """Triggers flake analyses for flaky tests found by CI failure analysis."""
        master_name, builder_name, build_number = build_key.GetParts()
        flake_settings = waterfall_config.GetCheckFlakeSettings()
        throttled = flake_settings.get('throttle_flake_analyses', True)

        analysis = WfAnalysis.Get(master_name, builder_name, build_number)

        if not analysis or not analysis.flaky_tests:
            return

        analysis_counts = defaultdict(lambda: defaultdict(int))
        for step, flaky_tests in analysis.flaky_tests.iteritems():
            logging.info('%s/%s/%s/%s has %s flaky tests.', master_name,
                         builder_name, build_number, step, len(flaky_tests))

            for test_name in flaky_tests:
                # TODO(crbug.com/904050): Deprecate FlakeAnalysisRequest in favor of
                # Flake.
                flake = flake_util.GetFlake(_LUCI_PROJECT, step, test_name,
                                            master_name, builder_name,
                                            build_number)
                request = FlakeAnalysisRequest.Create(test_name, False, None)
                request.AddBuildStep(master_name, builder_name, build_number,
                                     step, time_util.GetUTCNow())
                request.flake_key = flake.key
                scheduled = flake_analysis_service.ScheduleAnalysisForFlake(
                    request, '*****@*****.**',
                    False, triggering_sources.FINDIT_PIPELINE)
                if scheduled:  # pragma: no branch
                    analysis_counts[step]['analyzed'] += 1
                    logging.info(
                        'A flake analysis has been triggered for %s/%s', step,
                        test_name)
                    if throttled and len(flaky_tests) > 1:
                        logging.info(
                            'Throttling is enabled, skipping %d tests.',
                            len(flaky_tests) - 1)
                        analysis_counts[step]['throttled'] = len(
                            flaky_tests) - 1
                        break  # If we're throttled, stop after the first.
            else:
                analysis_counts[step]['error'] += 1

        for step, step_counts in analysis_counts.iteritems():
            # Collects metrics.
            step_metadata = step_util.GetStepMetadata(master_name,
                                                      builder_name,
                                                      build_number, step)
            canonical_step_name = step_metadata.get(
                'canonical_step_name') or 'Unknown'
            isolate_target_name = step_metadata.get(
                'isolate_target_name') or 'Unknown'

            for operation, count in step_counts.iteritems():
                monitoring.OnFlakeIdentified(canonical_step_name,
                                             isolate_target_name, operation,
                                             count)
 def _CreateTryJobData(self, build_id, try_job_key, has_heuristic_results):
     try_job_data = WfTryJobData.Create(build_id)
     try_job_data.created_time = time_util.GetUTCNow()
     try_job_data.has_compile_targets = False
     try_job_data.has_heuristic_results = has_heuristic_results
     try_job_data.try_job_key = try_job_key
     try_job_data.try_job_type = failure_type.GetDescriptionForFailureType(
         failure_type.TEST)
     try_job_data.put()
Beispiel #29
0
 def _ResetAnalysis(self, master_name, builder_name, build_number):
     analysis = WfAnalysis.Get(master_name, builder_name, build_number)
     analysis.pipeline_status_path = self.pipeline_status_path()
     analysis.status = analysis_status.RUNNING
     analysis.result_status = None
     analysis.start_time = time_util.GetUTCNow()
     analysis.version = appengine_util.GetCurrentVersion()
     analysis.end_time = None
     analysis.put()
Beispiel #30
0
    def run(self, *_args, **_kwargs):
        """Call predator to do the analysis of the given crash.

    N.B., due to the structure of AppEngine pipelines, this method must
    accept the same arguments as are passed to ``__init__``; however,
    because they were already passed to ``__init__`` there's no use in
    recieving them here. Thus, we discard all the arguments to this method
    (except for ``self``, naturally).
    """
        logging.info('Start analysis of crash_pipeline. %s',
                     json.dumps(self._crash_identifiers))
        # TODO(wrengr): shouldn't this method somehow call _NeedsNewAnalysis
        # to guard against race conditions?
        analysis = self._predator.GetAnalysis(self._crash_identifiers)

        # Update the model's status to say we're in the process of doing analysis.
        analysis.pipeline_status_path = self.pipeline_status_path()
        analysis.status = analysis_status.RUNNING
        analysis.started_time = time_util.GetUTCNow()
        analysis.predator_version = appengine_util.GetCurrentVersion()
        analysis.put()

        # Actually do the analysis.
        culprit = self._predator.FindCulprit(analysis)
        result, tags = culprit.ToDicts()

        analysis.status = (analysis_status.COMPLETED
                           if tags['success'] else analysis_status.ERROR)
        analysis.completed_time = time_util.GetUTCNow()
        # Update model's status to say we're done, and save the results.
        analysis.result = result
        for tag_name, tag_value in tags.iteritems():
            # TODO(http://crbug.com/602702): make it possible to add arbitrary tags.
            # TODO(http://crbug.com/659346): we misplaced the coverage test;
            # find it!
            if hasattr(analysis, tag_name):  # pragma: no cover
                setattr(analysis, tag_name, tag_value)

        analysis.put()
        self._predator.UpdateMetrics(analysis)

        logging.info('Found %s analysis result for %s: \n%s', self.client_id,
                     repr(self._crash_identifiers),
                     json.dumps(analysis.result, indent=2, sort_keys=True))