def RunPredator(): """Runs delta testing between 2 different Predator versions.""" argparser = argparse.ArgumentParser( description='Run Predator on a batch of crashes.') argparser.add_argument( '--input-path', dest='input_path', default=None, help='Path to read a list of ``CrashAnalysis`` entities') argparser.add_argument('--result-path', dest='result_path', default=None, help='Path to store results') argparser.add_argument('--key', '-k', default=None, help='Key to one single crash.') argparser.add_argument( '--client', '-c', default=setup.DEFAULT_CLIENT, help=('Type of client data the delta test is running on, ' 'possible values are: fracas, cracas, clusterfuzz. ' 'Right now, only fracas data is available')) argparser.add_argument( '--app', '-a', default=setup.DEFAULT_APP_ID, help=('App id of the App engine app that query needs to access. ' 'Defaults to \'%s\'. NOTE, only appspot app ids are supported, ' 'the app_id of googleplex app will have access issues ' 'due to internal proxy. ') % setup.DEFAULT_APP_ID) argparser.add_argument('--verbose', '-v', action='store_true', default=False, help='Print Predator results.') args = argparser.parse_args() if args.input_path: with open(args.input_path) as f: crashes = pickle.loads(zlib.decompress(f.read())) elif args.key: remote_api.EnableRemoteApi(app_id=args.app) crashes = {args.key: ndb.Key(urlsafe=args.key).get()} if not crashes: logging.error('Failed to get crashes info.') return culprits = GetCulprits(crashes, args.client, args.app, args.verbose) if args.result_path: script_util.FlushResult(culprits, args.result_path)
def main(): # TODO: add options to limit the date range to fetch # TODO: add options to display summary of fetched info. remote_api.EnableRemoteApi(app_id='findit-for-me') try: all_analyses, stored_start, stored_end, time_records = _LoadAnalyses() except IOError: all_analyses = _FetchAnalyses(START_DATE, END_DATE) time_records = {} _SaveAnalyses(all_analyses, time_records) else: if stored_start > START_DATE: all_analyses = _FetchAnalyses(START_DATE, stored_start) + all_analyses if END_DATE > stored_end: all_analyses += _FetchAnalyses(stored_end, END_DATE) _SaveAnalyses(all_analyses, time_records) saved_count = len(time_records) q = Queue.Queue() r = Queue.Queue() for _ in range(THREAD_COUNT): t = Thread(target=_GetTimes, args=(q, r)) t.daemon = True t.start() for a in all_analyses: if a.key not in time_records.keys(): q.put(a) while not (q.empty() and r.empty()): key, record = r.get() time_records[key] = _Denest(record) print len(time_records) if saved_count + THREAD_COUNT < len(time_records): _SaveAnalyses(all_analyses, time_records) saved_count = len(time_records) print 'Saved %d results' % saved_count q.join()
def ScriptIterate(query, app_id, projection=None, batch_size=iterator.DEFAULT_BATCH_SIZE, batch_run=False): # pragma: no cover. """Iterates entities queried by query. Args: query (ndb.Query): The query to fetch entities. batch_size (int): The number of entities to query at one time. batch_run (bool): If True, iterate batches of entities, if False, iterate each entity. An exmaple is available in crash_printer/print_crash.py. """ remote_api.EnableRemoteApi(app_id) for entity in iterator.Iterate(query, projection=projection, batch_size=batch_size, batch_run=batch_run): yield entity
def GetCulprits(crashes, client_id, app_id, verbose=False): # pragma: no cover. """Run ``CrashAnalysis`` entities in parallel and returns culprits. Args: crashes (list): A list of ``CrashAnalysis`` entities to run Predator on and get culprit results. client_id (CrashClient): One of CrashClient.FRACAS, CrashClient.CRACAS and CrashClient.CLUSTERFUZZ. app_id (str): Project id of app engine app. verbose (boolean): Whether to print every culprit results or not. Returns: A dict mapping crash id (urlsafe of entity key for Cracas/Fracas, testcase id for Cluterfuzz) to culprit results (dict version of ``Culprit``.) """ # Enable remote access to app engine services. remote_api.EnableRemoteApi(app_id) tasks = [] lock = threading.Lock() config = CrashConfig.Get() id_to_culprits = {} for crash in crashes.itervalues(): tasks.append({ 'function': StoreResults, 'args': [crash, client_id, app_id, id_to_culprits, lock, config], 'kwargs': { 'verbose': verbose } }) script_util.RunTasks(tasks) return id_to_culprits
def GetAndShowResults(): # Set up the Remote API to use services on the live App Engine. remote_api.EnableRemoteApi(app_id='findit-for-me') # Try to load analyses from cache file on disk. If analyses can't be loaded # from disk, retrieve analyses from remote API, and save to disk. try: all_analyses = _LoadAnalyses() except IOError: all_analyses = _FetchAnalyses(START_DATE, END_DATE) _SaveAnalyses(all_analyses) compile_analyses = [ analysis for analysis in all_analyses if analysis.failure_type == failure_type.COMPILE and analysis.correct ] builds = _FetchBuildsFromAnalyses(compile_analyses) compile_analyses_groups = _GroupCompileFailureAnalyses(compile_analyses) print 'Number of compile_analyses_groups: %d' % len( compile_analyses_groups) time_groups = [ sorted(_GetTimesGroupFromAnalysesGroup(group, builds)) for group in compile_analyses_groups ] group_sizes = [len(group) for group in time_groups] average_analyses = numpy.average(group_sizes) median_analyses = numpy.median(group_sizes) standard_deviation_analyses = numpy.std(group_sizes) average_seconds_between_first_two_analyses_list = (numpy.average( _GetSecondsBetweenTwoTimesInGroupList(time_groups, 0, 1))) average_seconds_between_first_and_last_analyses_list = (numpy.average( _GetSecondsBetweenTwoTimesInGroupList(time_groups, 0, -1))) seconds_between_all_times = _GetSecondsBetweenAllTimesInGroupList( time_groups) average_of_average_seconds_between_all_times = numpy.average( [numpy.average(durations) for durations in seconds_between_all_times]) builders = Counter([ analyses_group[0].builder_name for analyses_group in compile_analyses_groups ]) print('Average (mean) number of analyses in each group: %d' % average_analyses) print('Median number of analyses in each group: %d' % median_analyses) print('Standard deviation of number of analyses in each group: %d' % standard_deviation_analyses) print print( 'Average seconds between first two analyses\' request times: %d ' 'seconds' % average_seconds_between_first_two_analyses_list) print( 'Average seconds between first and last analyses\' request times: %d ' 'seconds' % average_seconds_between_first_and_last_analyses_list) print( 'Average of average seconds between all analyses\' request times: %d ' 'seconds' % average_of_average_seconds_between_all_times) print print 'Builders of first analyses in each group:' print builders
args_dict = vars(parser.parse_args()) # Preserve order from original command. ordered_args = [] for original_arg in command_line_args: parsed_arg = original_arg.lstrip('-') if args_dict[parsed_arg]: ordered_args.append(parsed_arg) return ordered_args if __name__ == '__main__': # Set up the Remote API to use services on the live App Engine. remote_api.EnableRemoteApi(app_id='findit-for-me') START_DATE = datetime.datetime(2016, 4, 17) END_DATE = datetime.datetime(2016, 7, 15) try_job_data_query = WfTryJobData.query( WfTryJobData.request_time >= START_DATE, WfTryJobData.request_time < END_DATE) categorized_data = try_job_data_query.fetch() args = GetArgsInOrder() for arg in args: categorized_data = SplitStructByOption(categorized_data, arg) # TODO(lijeffrey): Display data in an html page instead of printing. PrettyPrint(categorized_data, START_DATE, END_DATE)
def RunDeltaTest(): """Runs delta testing between two different Predator revisions.""" argparser = argparse.ArgumentParser(description=( 'Delta test is a script to report the differences between ' 'analysis results of two local repo revisions. Local git ' 'checkouts are used instead of Gitile to avoid quota ' 'issue.\nNOTE, since the delta test needs to switch on ' 'different revisions of local repo, please commit all local ' 'changes before running the script, and do not make any ' 'new changes while running it.')) argparser.add_argument('testset', help='The path to testset to run delta test on.') argparser.add_argument( '--revisions', '-r', nargs='+', default=['HEAD^', 'HEAD'], help=( 'The Predator revisions to be compared. It can take ' 'one or two revisions seperated by empty spaces. N.B. The revision ' 'can be any format that git can recognize, for example, it can be ' 'either "97312dbc1" or "HEAD~5"\n' '(1) -r rev1 rev2: compare rev1 and rev2\n' '(2) -r rev: compare rev and current HEAD\n' '(3) no revisions provided, default to compare HEAD^ and HEAD')) argparser.add_argument( '--client', '-c', default='cracas', help=('Type of client data the delta test is running on, ' 'possible values are: fracas, cracas, clusterfuzz. ' 'Right now, only fracas data is available')) argparser.add_argument( '--app', '-a', default=setup.DEFAULT_APP_ID, help=('App id of the App engine app that query needs to access. ' 'Defaults to \'%s\'. NOTE, only appspot app ids are supported, ' 'the app_id of googleplex app will have access issues ' 'due to internal proxy.') % setup.DEFAULT_APP_ID) argparser.add_argument('--verbose', '-v', action='store_true', default=False, help='Print Predator results. Defaults to False.') args = argparser.parse_args() # If in verbose mode, prints debug information. if args.verbose: logging.basicConfig(level=logging.DEBUG) else: logging.basicConfig(level=logging.INFO) if len(args.revisions) > 2: logging.error('Delta test can compare at most two revisions.') sys.exit(1) # If only one revision provided, default the rev2 to HEAD. if len(args.revisions) == 1: args.revisions.append('HEAD') remote_api.EnableRemoteApi(args.app) git_hash1 = delta_util.ParseGitHash(args.revisions[0]) git_hash2 = delta_util.ParseGitHash(args.revisions[1]) testset_path = os.path.realpath(args.testset) deltas, triage_results, crash_num = EvaluateDeltaOnTestSet( git_hash1, git_hash2, args.client, args.app, testset_path, verbose=args.verbose) delta_csv_path = GenerateDeltaCSVPath(DELTA_RESULTS_DIRECTORY, git_hash1, git_hash2, args.client, testset_path) delta_util.WriteDeltaToCSV(deltas, crash_num, args.client, args.app, git_hash1, git_hash2, delta_csv_path, triage_results=triage_results) # Print delta results to users. print '\n========================= Summary =========================' if args.verbose: delta_util.PrintDelta(deltas, crash_num, args.client, args.app) print 'Writing delta diff to %s\n' % delta_csv_path
'due to internal proxy. ') % setup.DEFAULT_APP_ID) argparser.add_argument( '--strict', '-s', default=False, action='store_true', help= ('Whether to use strict grade model or not, if strict is true, an ' 'example is considered to be a true positive iff: the correct CL is' ' among the suspects identified by Predator, and Predator assigned ' 'it a confidence value greater than or equal to that of any other ' 'suspect. Else if strict is false, an example is considered to be ' 'true when the suspects is identified by Predator, even it\'s not ' 'with the highest confidence score.')) argparser.add_argument( '--suspect-type', '-s', dest='suspect_type', default='cls', help=('The type of suspect to compute metrics of. ' 'The types can only be:\n1. cls: suspected cls\n' '2. components: suspected_components.')) args = argparser.parse_args() remote_api.EnableRemoteApi(args.app) examples = grade_model.RunModelOnTestSet(args.client, args.app, args.testset, args.suspect_type) grade_model.PrintMetrics(examples, args.suspect_type, strict=args.strict)