Esempio n. 1
0
def RunPredator():
    """Runs delta testing between 2 different Predator versions."""
    argparser = argparse.ArgumentParser(
        description='Run Predator on a batch of crashes.')

    argparser.add_argument(
        '--input-path',
        dest='input_path',
        default=None,
        help='Path to read a list of ``CrashAnalysis`` entities')

    argparser.add_argument('--result-path',
                           dest='result_path',
                           default=None,
                           help='Path to store results')

    argparser.add_argument('--key',
                           '-k',
                           default=None,
                           help='Key to one single crash.')

    argparser.add_argument(
        '--client',
        '-c',
        default=setup.DEFAULT_CLIENT,
        help=('Type of client data the delta test is running on, '
              'possible values are: fracas, cracas, clusterfuzz. '
              'Right now, only fracas data is available'))

    argparser.add_argument(
        '--app',
        '-a',
        default=setup.DEFAULT_APP_ID,
        help=('App id of the App engine app that query needs to access. '
              'Defaults to \'%s\'. NOTE, only appspot app ids are supported, '
              'the app_id of googleplex app will have access issues '
              'due to internal proxy. ') % setup.DEFAULT_APP_ID)

    argparser.add_argument('--verbose',
                           '-v',
                           action='store_true',
                           default=False,
                           help='Print Predator results.')
    args = argparser.parse_args()

    if args.input_path:
        with open(args.input_path) as f:
            crashes = pickle.loads(zlib.decompress(f.read()))
    elif args.key:
        remote_api.EnableRemoteApi(app_id=args.app)
        crashes = {args.key: ndb.Key(urlsafe=args.key).get()}

    if not crashes:
        logging.error('Failed to get crashes info.')
        return

    culprits = GetCulprits(crashes, args.client, args.app, args.verbose)

    if args.result_path:
        script_util.FlushResult(culprits, args.result_path)
Esempio n. 2
0
def main():
    # TODO: add options to limit the date range to fetch
    # TODO: add options to display summary of fetched info.

    remote_api.EnableRemoteApi(app_id='findit-for-me')

    try:
        all_analyses, stored_start, stored_end, time_records = _LoadAnalyses()
    except IOError:
        all_analyses = _FetchAnalyses(START_DATE, END_DATE)
        time_records = {}
        _SaveAnalyses(all_analyses, time_records)
    else:
        if stored_start > START_DATE:
            all_analyses = _FetchAnalyses(START_DATE,
                                          stored_start) + all_analyses
        if END_DATE > stored_end:
            all_analyses += _FetchAnalyses(stored_end, END_DATE)
        _SaveAnalyses(all_analyses, time_records)
    saved_count = len(time_records)

    q = Queue.Queue()
    r = Queue.Queue()

    for _ in range(THREAD_COUNT):
        t = Thread(target=_GetTimes, args=(q, r))
        t.daemon = True
        t.start()

    for a in all_analyses:
        if a.key not in time_records.keys():
            q.put(a)

    while not (q.empty() and r.empty()):
        key, record = r.get()
        time_records[key] = _Denest(record)
        print len(time_records)
        if saved_count + THREAD_COUNT < len(time_records):
            _SaveAnalyses(all_analyses, time_records)
            saved_count = len(time_records)
            print 'Saved %d results' % saved_count
    q.join()
Esempio n. 3
0
def ScriptIterate(query,
                  app_id,
                  projection=None,
                  batch_size=iterator.DEFAULT_BATCH_SIZE,
                  batch_run=False):  # pragma: no cover.
  """Iterates entities queried by query.

  Args:
    query (ndb.Query): The query to fetch entities.
    batch_size (int): The number of entities to query at one time.
    batch_run (bool): If True, iterate batches of entities, if
      False, iterate each entity.

    An exmaple is available in crash_printer/print_crash.py.
  """
  remote_api.EnableRemoteApi(app_id)

  for entity in iterator.Iterate(query, projection=projection,
                                 batch_size=batch_size,
                                 batch_run=batch_run):
    yield entity
Esempio n. 4
0
def GetCulprits(crashes,
                client_id,
                app_id,
                verbose=False):  # pragma: no cover.
    """Run ``CrashAnalysis`` entities in parallel and returns culprits.

  Args:
    crashes (list): A list of ``CrashAnalysis`` entities to run Predator on and
      get culprit results.
    client_id (CrashClient): One of CrashClient.FRACAS, CrashClient.CRACAS and
      CrashClient.CLUSTERFUZZ.
    app_id (str): Project id of app engine app.
    verbose (boolean): Whether to print every culprit results or not.

  Returns:
    A dict mapping crash id (urlsafe of entity key for Cracas/Fracas, testcase
    id for Cluterfuzz) to culprit results (dict version of ``Culprit``.)
  """
    # Enable remote access to app engine services.
    remote_api.EnableRemoteApi(app_id)

    tasks = []
    lock = threading.Lock()
    config = CrashConfig.Get()
    id_to_culprits = {}
    for crash in crashes.itervalues():
        tasks.append({
            'function':
            StoreResults,
            'args': [crash, client_id, app_id, id_to_culprits, lock, config],
            'kwargs': {
                'verbose': verbose
            }
        })
    script_util.RunTasks(tasks)

    return id_to_culprits
def GetAndShowResults():
    # Set up the Remote API to use services on the live App Engine.
    remote_api.EnableRemoteApi(app_id='findit-for-me')

    # Try to load analyses from cache file on disk. If analyses can't be loaded
    # from disk, retrieve analyses from remote API, and save to disk.

    try:
        all_analyses = _LoadAnalyses()
    except IOError:
        all_analyses = _FetchAnalyses(START_DATE, END_DATE)
        _SaveAnalyses(all_analyses)

    compile_analyses = [
        analysis for analysis in all_analyses
        if analysis.failure_type == failure_type.COMPILE and analysis.correct
    ]

    builds = _FetchBuildsFromAnalyses(compile_analyses)

    compile_analyses_groups = _GroupCompileFailureAnalyses(compile_analyses)
    print 'Number of compile_analyses_groups: %d' % len(
        compile_analyses_groups)

    time_groups = [
        sorted(_GetTimesGroupFromAnalysesGroup(group, builds))
        for group in compile_analyses_groups
    ]
    group_sizes = [len(group) for group in time_groups]
    average_analyses = numpy.average(group_sizes)
    median_analyses = numpy.median(group_sizes)
    standard_deviation_analyses = numpy.std(group_sizes)
    average_seconds_between_first_two_analyses_list = (numpy.average(
        _GetSecondsBetweenTwoTimesInGroupList(time_groups, 0, 1)))
    average_seconds_between_first_and_last_analyses_list = (numpy.average(
        _GetSecondsBetweenTwoTimesInGroupList(time_groups, 0, -1)))
    seconds_between_all_times = _GetSecondsBetweenAllTimesInGroupList(
        time_groups)
    average_of_average_seconds_between_all_times = numpy.average(
        [numpy.average(durations) for durations in seconds_between_all_times])
    builders = Counter([
        analyses_group[0].builder_name
        for analyses_group in compile_analyses_groups
    ])

    print('Average (mean) number of analyses in each group: %d' %
          average_analyses)
    print('Median number of analyses in each group: %d' % median_analyses)
    print('Standard deviation of number of analyses in each group: %d' %
          standard_deviation_analyses)
    print
    print(
        'Average seconds between first two analyses\' request times: %d '
        'seconds' % average_seconds_between_first_two_analyses_list)
    print(
        'Average seconds between first and last analyses\' request times: %d '
        'seconds' % average_seconds_between_first_and_last_analyses_list)
    print(
        'Average of average seconds between all analyses\' request times: %d '
        'seconds' % average_of_average_seconds_between_all_times)
    print
    print 'Builders of first analyses in each group:'
    print builders
    args_dict = vars(parser.parse_args())

    # Preserve order from original command.
    ordered_args = []

    for original_arg in command_line_args:
        parsed_arg = original_arg.lstrip('-')
        if args_dict[parsed_arg]:
            ordered_args.append(parsed_arg)

    return ordered_args


if __name__ == '__main__':
    # Set up the Remote API to use services on the live App Engine.
    remote_api.EnableRemoteApi(app_id='findit-for-me')

    START_DATE = datetime.datetime(2016, 4, 17)
    END_DATE = datetime.datetime(2016, 7, 15)

    try_job_data_query = WfTryJobData.query(
        WfTryJobData.request_time >= START_DATE,
        WfTryJobData.request_time < END_DATE)
    categorized_data = try_job_data_query.fetch()

    args = GetArgsInOrder()
    for arg in args:
        categorized_data = SplitStructByOption(categorized_data, arg)

    # TODO(lijeffrey): Display data in an html page instead of printing.
    PrettyPrint(categorized_data, START_DATE, END_DATE)
def RunDeltaTest():
    """Runs delta testing between two different Predator revisions."""
    argparser = argparse.ArgumentParser(description=(
        'Delta test is a script to report the differences between '
        'analysis results of two local repo revisions. Local git '
        'checkouts are used instead of Gitile to avoid quota '
        'issue.\nNOTE, since the delta test needs to switch on '
        'different revisions of local repo, please commit all local '
        'changes before running the script, and do not make any '
        'new changes while running it.'))

    argparser.add_argument('testset',
                           help='The path to testset to run delta test on.')

    argparser.add_argument(
        '--revisions',
        '-r',
        nargs='+',
        default=['HEAD^', 'HEAD'],
        help=(
            'The Predator revisions to be compared. It can take '
            'one or two revisions seperated by empty spaces. N.B. The revision '
            'can be any format that git can recognize, for example, it can be '
            'either "97312dbc1" or "HEAD~5"\n'
            '(1) -r rev1 rev2: compare rev1 and rev2\n'
            '(2) -r rev: compare rev and current HEAD\n'
            '(3) no revisions provided, default to compare HEAD^ and HEAD'))

    argparser.add_argument(
        '--client',
        '-c',
        default='cracas',
        help=('Type of client data the delta test is running on, '
              'possible values are: fracas, cracas, clusterfuzz. '
              'Right now, only fracas data is available'))

    argparser.add_argument(
        '--app',
        '-a',
        default=setup.DEFAULT_APP_ID,
        help=('App id of the App engine app that query needs to access. '
              'Defaults to \'%s\'. NOTE, only appspot app ids are supported, '
              'the app_id of googleplex app will have access issues '
              'due to internal proxy.') % setup.DEFAULT_APP_ID)

    argparser.add_argument('--verbose',
                           '-v',
                           action='store_true',
                           default=False,
                           help='Print Predator results. Defaults to False.')

    args = argparser.parse_args()

    # If in verbose mode, prints debug information.
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    if len(args.revisions) > 2:
        logging.error('Delta test can compare at most two revisions.')
        sys.exit(1)

    # If only one revision provided, default the rev2 to HEAD.
    if len(args.revisions) == 1:
        args.revisions.append('HEAD')

    remote_api.EnableRemoteApi(args.app)

    git_hash1 = delta_util.ParseGitHash(args.revisions[0])
    git_hash2 = delta_util.ParseGitHash(args.revisions[1])

    testset_path = os.path.realpath(args.testset)
    deltas, triage_results, crash_num = EvaluateDeltaOnTestSet(
        git_hash1,
        git_hash2,
        args.client,
        args.app,
        testset_path,
        verbose=args.verbose)

    delta_csv_path = GenerateDeltaCSVPath(DELTA_RESULTS_DIRECTORY, git_hash1,
                                          git_hash2, args.client, testset_path)
    delta_util.WriteDeltaToCSV(deltas,
                               crash_num,
                               args.client,
                               args.app,
                               git_hash1,
                               git_hash2,
                               delta_csv_path,
                               triage_results=triage_results)

    # Print delta results to users.
    print '\n========================= Summary ========================='
    if args.verbose:
        delta_util.PrintDelta(deltas, crash_num, args.client, args.app)

    print 'Writing delta diff to %s\n' % delta_csv_path
Esempio n. 8
0
              'due to internal proxy. ') % setup.DEFAULT_APP_ID)

    argparser.add_argument(
        '--strict',
        '-s',
        default=False,
        action='store_true',
        help=
        ('Whether to use strict grade model or not, if strict is true, an '
         'example is considered to be a true positive iff: the correct CL is'
         ' among the suspects identified by Predator, and Predator assigned '
         'it a confidence value greater than or equal to that of any other '
         'suspect. Else if strict is false, an example is considered to be '
         'true when the suspects is identified by Predator, even it\'s not '
         'with the highest confidence score.'))

    argparser.add_argument(
        '--suspect-type',
        '-s',
        dest='suspect_type',
        default='cls',
        help=('The type of suspect to compute metrics of. '
              'The types can only be:\n1. cls: suspected cls\n'
              '2. components: suspected_components.'))

    args = argparser.parse_args()
    remote_api.EnableRemoteApi(args.app)
    examples = grade_model.RunModelOnTestSet(args.client, args.app,
                                             args.testset, args.suspect_type)
    grade_model.PrintMetrics(examples, args.suspect_type, strict=args.strict)