def do_test(log, check_errors=True): """ Test a single log. ``log`` - the url prefix of the log to test. Also searches for the result file with the same prefix. """ url = "file://{0}".format( SampleData().get_log_path("{0}.txt.gz".format(log))) builder = BuildbotLogViewArtifactBuilder(url, check_errors=check_errors) lpc = ArtifactBuilderCollection(url, builders=builder) lpc.parse() act = lpc.artifacts[builder.name] # we can't compare the "logurl" field, because it's a fully qualified url, # so it will be different depending on the config it's run in. assert "logurl" in act del(act["logurl"]) exp = test_utils.load_exp("{0}.logview.json".format(log)) # :: use to create the ``exp`` files, if you're making a lot of them # with open(SampleData().get_log_path("{0}.logview.json".format(log)), "w") as f: # f.write(json.dumps(act, indent=4)) # f.close() # log urls won't match in tests, since they're machine specific # but leave it in the exp file as an example of what the real structure # should look like. del(exp["logurl"]) assert act == exp#, diff(exp, act)
def do_test(log, check_errors=True): """ Test a single log. ``log`` - the url prefix of the log to test. Also searches for the result file with the same prefix. """ url = "file://{0}".format(SampleData().get_log_path( "{0}.txt.gz".format(log))) builder = BuildbotLogViewArtifactBuilder(url, check_errors=check_errors) lpc = ArtifactBuilderCollection(url, builders=builder) lpc.parse() act = lpc.artifacts[builder.name] # we can't compare the "logurl" field, because it's a fully qualified url, # so it will be different depending on the config it's run in. assert "logurl" in act del (act["logurl"]) exp = test_utils.load_exp("{0}.logview.json".format(log)) # :: use to create the ``exp`` files, if you're making a lot of them # with open(SampleData().get_log_path("{0}.logview.json".format(log)), "w") as f: # f.write(json.dumps(act, indent=4)) # log urls won't match in tests, since they're machine specific # but leave it in the exp file as an example of what the real structure # should look like. del (exp["logurl"]) assert act == exp # , diff(exp, act)
def test_all_builders_complete(): """test when parse.complete is true creates correct structure""" log = "mozilla-central_fedora-b2g_test-crashtest-1-bm54-tests1-linux-build50" url = "file://{0}".format( SampleData().get_log_path("{0}.txt.gz".format(log))) lpc = ArtifactBuilderCollection( url, ) for builder in lpc.builders: builder.parser.complete = True lpc.parse() exp = { "text_log_summary": { "step_data": { "steps": [], "errors_truncated": False }, }, "Job Info": { "job_details": [] } } act = lpc.artifacts # we can't compare the "logurl" field, because it's a fully qualified url, # so it will be different depending on the config it's run in. assert "logurl" in act["text_log_summary"] assert "logurl" in act["Job Info"] del(act["Job Info"]["logurl"]) del(act["text_log_summary"]["logurl"]) assert exp == lpc.artifacts, diff(exp, lpc.artifacts)
def handle(self, *args, **options): if len(args) != 1: raise CommandError("Need to specify (only) log URL") if options['profile']: num_runs = options['profile'] else: num_runs = 1 times = [] for i in range(num_runs): start = time.time() artifact_bc = ArtifactBuilderCollection(args[0], check_errors=True) artifact_bc.parse() times.append(time.time() - start) if not options['profile']: for name, artifact in artifact_bc.artifacts.items(): print "%s, %s" % (name, json.dumps(artifact)) if options['profile']: print "Timings: %s" % times print "Average: %s" % (sum(times)/len(times)) print "Total: %s" % sum(times)
def test_all_builders_complete(): """test when parse.complete is true creates correct structure""" url = add_log_response( "mozilla-central_fedora-b2g_test-crashtest-1-bm54-tests1-linux-build50.txt.gz" ) lpc = ArtifactBuilderCollection(url) for builder in lpc.builders: builder.parser.complete = True lpc.parse() exp = { "text_log_summary": { "step_data": { "steps": [], "errors_truncated": False }, "logurl": url, }, "Job Info": { "job_details": [], "logurl": url, } } assert exp == lpc.artifacts
def extract_text_log_artifacts(project, log_url, job_guid): """Generate a summary artifact for the raw text log.""" # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): if name == 'Job Info': for detail in artifact['job_details']: if ('title' in detail and detail['title'] == 'artifact uploaded' and detail['value'].endswith('_errorsummary.log')): # using .send_task to avoid an import loop. celery_app.send_task('store-error-summary', [project, detail['url'], job_guid], routing_key='store_error_summary') artifact_list.append({ "job_guid": job_guid, "name": name, "type": 'json', "blob": json.dumps(artifact) }) artifact_list.extend(get_error_summary_artifacts(artifact_list)) return artifact_list
def do_test(log): """ Test a single log with the ``JobArtifactBuilder``. ``log`` - the url prefix of the log to test. Also searches for the result file with the same prefix. """ url = "file://{0}".format(SampleData().get_log_path( "{0}.txt.gz".format(log))) exp = test_utils.load_exp("{0}.jobartifact.json".format(log)) builder = BuildbotJobArtifactBuilder(url) lpc = ArtifactBuilderCollection(url, builders=builder) lpc.parse() act = lpc.artifacts[builder.name] # we can't compare the "logurl" field, because it's a fully qualified url, # so it will be different depending on the config it's run in. assert "logurl" in act del (act["logurl"]) # leaving the logurl in the exp files so they are a good example of the # expected structure. del (exp["logurl"]) # assert act == exp, diff(exp, act) # if you want to gather results for a new test, use this assert len(act) == len(exp) for index, artifact in act.items(): assert artifact == exp[index]
def test_all_builders_complete(): """test when parse.complete is true creates correct structure""" log = "mozilla-central_fedora-b2g_test-crashtest-1-bm54-tests1-linux-build50" url = "file://{0}".format(SampleData().get_log_path( "{0}.txt.gz".format(log))) lpc = ArtifactBuilderCollection(url, ) for builder in lpc.builders: for parser in builder.parsers: parser.complete = True lpc.parse() exp = { "text_log_summary": { "header": {}, "step_data": { "all_errors": [], "steps": [], "errors_truncated": False }, }, "Job Info": { "job_details": [] } } act = lpc.artifacts # we can't compare the "logurl" field, because it's a fully qualified url, # so it will be different depending on the config it's run in. assert "logurl" in act["text_log_summary"] assert "logurl" in act["Job Info"] del (act["Job Info"]["logurl"]) del (act["text_log_summary"]["logurl"]) assert exp == lpc.artifacts, diff(exp, lpc.artifacts)
def do_test(log): """ Test a single log with the ``JobArtifactBuilder``. ``log`` - the url prefix of the log to test. Also searches for the result file with the same prefix. """ url = add_log_response("{}.txt.gz".format(log)) builder = BuildbotJobArtifactBuilder(url) lpc = ArtifactBuilderCollection(url, builders=builder) lpc.parse() act = lpc.artifacts[builder.name] exp = test_utils.load_exp("{0}.jobartifact.json".format(log)) # :: Uncomment to create the ``exp`` files, if you're making a lot of them # import json # from tests.sampledata import SampleData # with open(SampleData().get_log_path("{0}.jobartifact.json".format(log)), "w") as f: # f.write(json.dumps(act, indent=4)) # assert act == exp, diff(exp, act) # if you want to gather results for a new test, use this assert len(act) == len(exp) for index, artifact in act.items(): assert artifact == exp[index]
def do_test(log): """ Test a single log with the ``JobArtifactBuilder``. ``log`` - the url prefix of the log to test. Also searches for the result file with the same prefix. """ url = "file://{0}".format( SampleData().get_log_path("{0}.txt.gz".format(log))) exp = test_utils.load_exp("{0}.jobartifact.json".format(log)) builder = BuildbotJobArtifactBuilder(url) lpc = ArtifactBuilderCollection(url, builders=builder) lpc.parse() act = lpc.artifacts[builder.name] # we can't compare the "logurl" field, because it's a fully qualified url, # so it will be different depending on the config it's run in. assert "logurl" in act del(act["logurl"]) # leaving the logurl in the exp files so they are a good example of the # expected structure. del(exp["logurl"]) # assert act == exp, diff(exp, act) # if you want to gather results for a new test, use this assert len(act) == len(exp) for index, artifact in act.items(): assert artifact == exp[index]
def extract_log_artifacts(log_url, job_guid, check_errors): bug_suggestions = [] bugscache_uri = '{0}{1}'.format(settings.API_HOSTNAME, reverse("bugscache-list")) terms_requested = {} # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url, check_errors=check_errors) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_guid, name, 'json', json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts\ .get('Structured Log', {})\ .get('step_data', {})\ .get('all_errors', []) for err in all_errors: # remove the mozharness prefix clean_line = get_mozharness_substring(err['line']) # get a meaningful search term out of the error line search_term = get_error_search_term(clean_line) bugs = dict(open_recent=[], all_others=[]) # collect open recent and all other bugs suggestions if search_term: if not search_term in terms_requested: # retrieve the list of suggestions from the api bugs = get_bugs_for_search_term(search_term, bugscache_uri) terms_requested[search_term] = bugs else: bugs = terms_requested[search_term] if not bugs or not (bugs['open_recent'] or bugs['all_others']): # no suggestions, try to use # the crash signature as search term crash_signature = get_crash_signature(clean_line) if crash_signature: if not crash_signature in terms_requested: bugs = get_bugs_for_search_term( crash_signature, bugscache_uri) terms_requested[crash_signature] = bugs else: bugs = terms_requested[crash_signature] bug_suggestions.append({"search": clean_line, "bugs": bugs}) artifact_list.append( (job_guid, 'Bug suggestions', 'json', json.dumps(bug_suggestions))) return artifact_list
def test_log_download_size_limit(): """Test that logs whose Content-Length exceed the size limit are not parsed.""" url = 'http://foo.tld/fake_large_log.tar.gz' responses.add(responses.GET, url, body='', adding_headers={ 'Content-Encoding': 'gzip', 'Content-Length': str(MAX_DOWNLOAD_SIZE_IN_BYTES + 1), }) lpc = ArtifactBuilderCollection(url) with pytest.raises(LogSizeException): lpc.parse()
def extract_text_log_artifacts(project, log_url, job_guid): """Generate a summary artifact for the raw text log.""" # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append({"job_guid": job_guid, "name": name, "type": "json", "blob": json.dumps(artifact)}) artifact_list.extend(get_error_summary_artifacts(artifact_list)) return artifact_list
def test_performance_log_parsing(): """ Validate that we can parse a generic performance artifact """ sd = SampleData() for logfile in ['mozilla-inbound-android-api-11-debug-bm91-build1-build1317.txt.gz', 'try_ubuntu64_hw_test-chromez-bm103-tests1-linux-build1429.txt.gz']: file_path = sd.get_log_path(logfile) file_url = 'file://{}'.format(file_path) builder = BuildbotPerformanceDataArtifactBuilder(url=file_url) lpc = ArtifactBuilderCollection(file_url, builders=[builder]) lpc.parse() act = lpc.artifacts[builder.name] validate(act['performance_data'], PERFHERDER_SCHEMA)
def test_log_download_size_limit(): """Test that logs whose Content-Length exceed the size limit are not parsed.""" url = 'http://foo.tld/fake_large_log.tar.gz' responses.add( responses.GET, url, body='', adding_headers={ 'Content-Encoding': 'gzip', 'Content-Length': str(MAX_DOWNLOAD_SIZE_IN_BYTES + 1), } ) lpc = ArtifactBuilderCollection(url) with pytest.raises(LogSizeException): lpc.parse()
def test_performance_log_parsing(): """ Validate that we can parse a generic performance artifact """ sd = SampleData() for logfile in [ 'mozilla-inbound-android-api-11-debug-bm91-build1-build1317.txt.gz', 'try_ubuntu64_hw_test-chromez-bm103-tests1-linux-build1429.txt.gz' ]: file_path = sd.get_log_path(logfile) file_url = 'file://{}'.format(file_path) builder = BuildbotPerformanceDataArtifactBuilder(url=file_url) lpc = ArtifactBuilderCollection(file_url, builders=[builder]) lpc.parse() act = lpc.artifacts[builder.name] validate(act['performance_data'], PERFHERDER_SCHEMA)
def extract_text_log_artifacts(job_log): """Generate a set of artifacts by parsing from the raw text log.""" # parse a log given its url artifact_bc = ArtifactBuilderCollection(job_log.url) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append({ "job_guid": job_log.job.guid, "name": name, "type": 'json', "blob": json.dumps(artifact), }) return artifact_list
def extract_text_log_artifacts(job_log): """Generate a set of artifacts by parsing from the raw text log.""" # parse a log given its url artifact_bc = ArtifactBuilderCollection(job_log.url) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append({ "job_guid": job_log.job.guid, "name": name, "type": 'json', "blob": json.dumps(artifact) }) return artifact_list
def extract_text_log_artifacts(project, log_url, job_guid): """Generate a summary artifact for the raw text log.""" # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append({ "job_guid": job_guid, "name": name, "type": 'json', "blob": json.dumps(artifact) }) artifact_list.extend(get_error_summary_artifacts(artifact_list)) return artifact_list
def test_performance_log_parsing(): """ Validate that we can parse a generic performance artifact """ # first two have only one artifact, second has two artifacts for (logfile, num_perf_artifacts) in [ ('mozilla-inbound-android-api-11-debug-bm91-build1-build1317.txt.gz', 1), ('try_ubuntu64_hw_test-chromez-bm103-tests1-linux-build1429.txt.gz', 1), ('mozilla-inbound-linux64-bm72-build1-build225.txt.gz', 2)]: url = add_log_response(logfile) builder = BuildbotPerformanceDataArtifactBuilder(url=url) lpc = ArtifactBuilderCollection(url, builders=[builder]) lpc.parse() act = lpc.artifacts[builder.name] assert len(act['performance_data']) == num_perf_artifacts for perfherder_artifact in act['performance_data']: validate(perfherder_artifact, PERFHERDER_SCHEMA)
def test_talos_log_parsing(): """ Make sure all performance data log examples validate with the talos json schema. """ sd = SampleData() files = sd.get_talos_logs() for file_url in files: builder = BuildbotTalosDataArtifactBuilder(url=file_url) lpc = ArtifactBuilderCollection(file_url, builders=[builder]) lpc.parse() act = lpc.artifacts[builder.name] # Validate the data returned has the required datazilla # json schema for talos_datum in act['talos_data']: validate(talos_datum, TALOS_SCHEMA)
def test_performance_log_parsing(): """ Make sure all performance data log examples validate with the datazilla json schema. """ sd = SampleData() files = sd.get_performance_logs() tda = TalosDataAdapter() for file_url in files: builder = BuildbotPerformanceDataArtifactBuilder(url=file_url) lpc = ArtifactBuilderCollection(file_url, builders=[builder]) lpc.parse() act = lpc.artifacts[builder.name] # Validate the data returned has the required datazilla # json schema validate(act['talos_data'][0], tda.datazilla_schema)
def handle(self, *args, **options): if options['profile']: num_runs = options['profile'] else: num_runs = 1 times = [] for _ in range(num_runs): start = time.time() artifact_bc = ArtifactBuilderCollection(options['log_url']) artifact_bc.parse() times.append(time.time() - start) if not options['profile']: for name, artifact in artifact_bc.artifacts.items(): print("%s, %s" % (name, json.dumps(artifact, indent=2))) if options['profile']: print("Timings: %s" % times) print("Average: %s" % (sum(times) / len(times))) print("Total: %s" % sum(times))
def handle(self, *args, **options): if options['profile']: num_runs = options['profile'] else: num_runs = 1 times = [] for _ in range(num_runs): start = time.time() artifact_bc = ArtifactBuilderCollection(options['log_url']) artifact_bc.parse() times.append(time.time() - start) if not options['profile']: for name, artifact in artifact_bc.artifacts.items(): print("%s, %s" % (name, json.dumps(artifact, indent=2))) if options['profile']: print("Timings: %s" % times) print("Average: %s" % (sum(times)/len(times))) print("Total: %s" % sum(times))
def do_test(log): """ Test a single log. ``log`` - the url prefix of the log to test. Also searches for the result file with the same prefix. """ url = add_log_response("{}.txt.gz".format(log)) builder = BuildbotLogViewArtifactBuilder(url) lpc = ArtifactBuilderCollection(url, builders=builder) lpc.parse() act = lpc.artifacts[builder.name] exp = test_utils.load_exp("{0}.logview.json".format(log)) # :: Uncomment to create the ``exp`` files, if you're making a lot of them # import json # from tests.sampledata import SampleData # with open(SampleData().get_log_path("{0}.logview.json".format(log)), "w") as f: # f.write(json.dumps(act, indent=4)) assert act == exp # , diff(exp, act)
def do_test(log): """ Test a single log. ``log`` - the url prefix of the log to test. Also searches for the result file with the same prefix. """ url = add_log_response("{}.txt.gz".format(log)) builder = BuildbotLogViewArtifactBuilder(url) lpc = ArtifactBuilderCollection(url, builders=builder) lpc.parse() act = lpc.artifacts[builder.name] exp = test_utils.load_exp("{0}.logview.json".format(log)) # :: Uncomment to create the ``exp`` files, if you're making a lot of them # import json # from tests.sampledata import SampleData # with open(SampleData().get_log_path("{0}.logview.json".format(log)), "w") as f: # f.write(json.dumps(act, indent=2)) assert act == exp # , diff(exp, act)
def handle(self, *args, **options): if len(args) != 1: raise CommandError("Need to specify (only) log URL") if options['profile']: num_runs = options['profile'] else: num_runs = 1 times = [] for i in range(num_runs): start = time.time() artifact_bc = ArtifactBuilderCollection(args[0]) artifact_bc.parse() times.append(time.time() - start) if not options['profile']: for name, artifact in artifact_bc.artifacts.items(): print("%s, %s" % (name, json.dumps(artifact))) if options['profile']: print("Timings: %s" % times) print("Average: %s" % (sum(times)/len(times))) print("Total: %s" % sum(times))
def parse_log(project, job_log_url, job_guid, check_errors=False): """ Call ArtifactBuilderCollection on the given job. """ credentials = OAuthCredentials.get_credentials(project) req = TreeherderRequest( protocol=settings.TREEHERDER_REQUEST_PROTOCOL, host=settings.TREEHERDER_REQUEST_HOST, project=project, oauth_key=credentials.get('consumer_key', None), oauth_secret=credentials.get('consumer_secret', None), ) update_endpoint = 'job-log-url/{0}/update_parse_status'.format(job_log_url['id']) try: log_url = job_log_url['url'] bug_suggestions = [] bugscache_uri = '{0}{1}'.format( settings.API_HOSTNAME, reverse("bugscache-list") ) terms_requested = {} if log_url: # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url, check_errors=check_errors) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_guid, name, 'json', json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts.get( 'Structured Log', {} ).get( 'step_data', {} ).get( 'all_errors', [] ) for err in all_errors: # remove the mozharness prefix clean_line = get_mozharness_substring(err['line']) # get a meaningful search term out of the error line search_term = get_error_search_term(clean_line) bugs = dict(open_recent=[], all_others=[]) # collect open recent and all other bugs suggestions if search_term: if not search_term in terms_requested: # retrieve the list of suggestions from the api bugs = get_bugs_for_search_term( search_term, bugscache_uri ) terms_requested[search_term] = bugs else: bugs = terms_requested[search_term] if not bugs or not (bugs['open_recent'] or bugs['all_others']): # no suggestions, try to use # the crash signature as search term crash_signature = get_crash_signature(clean_line) if crash_signature: if not crash_signature in terms_requested: bugs = get_bugs_for_search_term( crash_signature, bugscache_uri ) terms_requested[crash_signature] = bugs else: bugs = terms_requested[crash_signature] bug_suggestions.append({ "search": clean_line, "bugs": bugs }) artifact_list.append((job_guid, 'Bug suggestions', 'json', json.dumps(bug_suggestions))) # store the artifacts generated tac = TreeherderArtifactCollection() for artifact in artifact_list: ta = tac.get_artifact({ "job_guid": artifact[0], "name": artifact[1], "type": artifact[2], "blob": artifact[3] }) tac.add(ta) req.post(tac) # send an update to job_log_url # the job_log_url status changes # from pending to running current_timestamp = time.time() status = 'parsed' req.send( update_endpoint, method='POST', data={ 'parse_status': status, 'parse_timestamp': current_timestamp } ) except Exception, e: parse_log.retry(exc=e) # send an update to job_log_url # the job_log_url status changes # from pending to running current_timestamp = time.time() status = 'failed' req.send( update_endpoint, method='POST', data={ 'parse_status': status, 'parse_timestamp': current_timestamp } ) # re raise the exception to leave a trace in the log raise
def parse_log(project, log_url, job_guid, resultset, check_errors=False): """ Call ArtifactBuilderCollection on the given job. """ mozharness_pattern = re.compile( '^\d+:\d+:\d+[ ]+(?:DEBUG|INFO|WARNING|ERROR|CRITICAL|FATAL) - [ ]?' ) bugs_cache = {'open': {}, 'closed': {}} bug_suggestions = {'open': {}, 'closed': {}} status_publisher = JobStatusPublisher(settings.BROKER_URL) failure_publisher = JobFailurePublisher(settings.BROKER_URL) try: # return the resultset with the job id to identify if the UI wants # to fetch the whole thing. bugscache_uri = '{0}{1}'.format( settings.API_HOSTNAME, reverse("bugscache-list") ) credentials = OAuthCredentials.get_credentials(project) if log_url: # parse a log given its url artifact_bc = ArtifactBuilderCollection( log_url, check_errors=check_errors, ) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_guid, name, 'json', json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts['Structured Log']['step_data']['all_errors'] for err in all_errors: # remove the mozharness prefix clean_line = mozharness_pattern.sub('', err['line']).strip() # get a meaningful search term out of the error line search_term = get_error_search_term(clean_line) # collect open and closed bugs suggestions for status in ('open', 'closed'): if not search_term: bug_suggestions[status][clean_line] = [] continue if search_term not in bugs_cache[status]: # retrieve the list of suggestions from the api bugs_cache[status][search_term] = get_bugs_for_search_term( search_term, status, bugscache_uri ) # no suggestions, try to use the crash signature as search term if not bugs_cache[status][search_term]: crash_signature = get_crash_signature(search_term) if crash_signature: bugs_cache[status][search_term] = get_bugs_for_search_term( search_term, status, bugscache_uri ) bug_suggestions[status][clean_line] = bugs_cache[status][search_term] artifact_list.append((job_guid, 'Open bugs', 'json', json.dumps(bug_suggestions['open']))) artifact_list.append((job_guid, 'Closed bugs', 'json', json.dumps(bug_suggestions['closed']))) # store the artifacts generated tac = TreeherderArtifactCollection() for artifact in artifact_list: ta = tac.get_artifact({ "job_guid": artifact[0], "name": artifact[1], "type": artifact[2], "blob": artifact[3] }) tac.add(ta) req = TreeherderRequest( protocol=settings.TREEHERDER_REQUEST_PROTOCOL, host=settings.TREEHERDER_REQUEST_HOST, project=project, oauth_key=credentials.get('consumer_key', None), oauth_secret=credentials.get('consumer_secret', None), ) req.send(tac) status_publisher.publish(job_guid, resultset, project, 'processed') if check_errors: failure_publisher.publish(job_guid, project) finally: status_publisher.disconnect() failure_publisher.disconnect()
def extract_log_artifacts(log_url, job_guid, check_errors): bug_suggestions = [] bugscache_uri = '{0}{1}'.format( settings.API_HOSTNAME, reverse("bugscache-list") ) terms_requested = {} # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url, check_errors=check_errors) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_guid, name, 'json', json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts\ .get('Structured Log', {})\ .get('step_data', {})\ .get('all_errors', []) for err in all_errors: # remove the mozharness prefix clean_line = get_mozharness_substring(err['line']) # get a meaningful search term out of the error line search_term = get_error_search_term(clean_line) bugs = dict(open_recent=[], all_others=[]) # collect open recent and all other bugs suggestions if search_term: if not search_term in terms_requested: # retrieve the list of suggestions from the api bugs = get_bugs_for_search_term( search_term, bugscache_uri ) terms_requested[search_term] = bugs else: bugs = terms_requested[search_term] if not bugs or not (bugs['open_recent'] or bugs['all_others']): # no suggestions, try to use # the crash signature as search term crash_signature = get_crash_signature(clean_line) if crash_signature: if not crash_signature in terms_requested: bugs = get_bugs_for_search_term( crash_signature, bugscache_uri ) terms_requested[crash_signature] = bugs else: bugs = terms_requested[crash_signature] bug_suggestions.append({ "search": clean_line, "bugs": bugs }) artifact_list.append( ( job_guid, 'Bug suggestions', 'json', json.dumps(bug_suggestions) ) ) return artifact_list
def parse_log(log, check_errors): artifact_builder_collection = ArtifactBuilderCollection( log, check_errors=check_errors, ) artifact_builder_collection.parse()
def parse_log(project, job_id, result_set_id, check_errors=False): """ Call ArtifactBuilderCollection on the given job. """ pattern_obj = re.compile('\d+:\d+:\d+\s+') jm = JobsModel(project=project) rdm = RefDataManager() open_bugs_cache = {} closed_bugs_cache = {} status_publisher = JobStatusPublisher(settings.BROKER_URL) failure_publisher = JobFailurePublisher(settings.BROKER_URL) try: # return the resultset with the job id to identify if the UI wants # to fetch the whole thing. resultset = jm.get_result_set_by_id(result_set_id=result_set_id)[0] del(resultset["active_status"]) del(resultset["revision_hash"]) log_references = jm.get_log_references(job_id) # we may have many log references per job for log in log_references: # parse a log given its url artifact_bc = ArtifactBuilderCollection( log['url'], check_errors=check_errors, ) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_id, name, 'json', json.dumps(artifact))) if check_errors: # I'll try to begin with a full_text search on the entire row all_errors = artifact_bc.artifacts['Structured Log']['step_data']['all_errors'] open_bugs_suggestions = {} closed_bugs_suggestions = {} for err in all_errors: # remove timestamp clean_line = pattern_obj.sub('', err['line']) if clean_line not in open_bugs_cache: open_bugs_cache[clean_line] = rdm.get_suggested_bugs( clean_line) if clean_line not in closed_bugs_cache: closed_bugs_cache[clean_line] = rdm.get_suggested_bugs( clean_line, open_bugs=False) open_bugs_suggestions[ err['line'] ] = open_bugs_cache[clean_line] closed_bugs_suggestions[ err['line'] ] = closed_bugs_cache[clean_line] artifact_list.append((job_id, 'Open bugs', 'json', json.dumps(open_bugs_suggestions))) artifact_list.append((job_id, 'Closed bugs', 'json', json.dumps(closed_bugs_suggestions))) # store the artifacts generated jm.store_job_artifact(artifact_list) status_publisher.publish(job_id, resultset, project, 'processed') if check_errors: failure_publisher.publish(job_id, project) finally: rdm.disconnect() jm.disconnect() status_publisher.disconnect() failure_publisher.disconnect()
def extract_text_log_artifacts(log_url, job_guid, check_errors): """Generate a summary artifact for the raw text log.""" bug_suggestions = [] bugscache_uri = '{0}{1}'.format( settings.API_HOSTNAME, reverse("bugscache-list") ) terms_requested = {} # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url, check_errors=check_errors) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_guid, name, 'json', json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts\ .get('text_log_summary', {})\ .get('step_data', {})\ .get('all_errors', []) for err in all_errors: # remove the mozharness prefix clean_line = get_mozharness_substring(err['line']) search_terms = [] # get a meaningful search term out of the error line search_term = get_error_search_term(clean_line) bugs = dict(open_recent=[], all_others=[]) # collect open recent and all other bugs suggestions if search_term: search_terms.append(search_term) if search_term not in terms_requested: # retrieve the list of suggestions from the api bugs = get_bugs_for_search_term( search_term, bugscache_uri ) terms_requested[search_term] = bugs else: bugs = terms_requested[search_term] if not bugs or not (bugs['open_recent'] or bugs['all_others']): # no suggestions, try to use # the crash signature as search term crash_signature = get_crash_signature(clean_line) if crash_signature: search_terms.append(crash_signature) if crash_signature not in terms_requested: bugs = get_bugs_for_search_term( crash_signature, bugscache_uri ) terms_requested[crash_signature] = bugs else: bugs = terms_requested[crash_signature] # TODO: Rename 'search' to 'error_text' or similar, since that's # closer to what it actually represents (bug 1091060). bug_suggestions.append({ "search": clean_line, "search_terms": search_terms, "bugs": bugs }) artifact_list.append( ( job_guid, 'Bug suggestions', 'json', json.dumps(bug_suggestions) ) ) return artifact_list
def extract_text_log_artifacts(log_url, job_guid, check_errors): """Generate a summary artifact for the raw text log.""" bug_suggestions = [] bugscache_uri = '{0}{1}'.format(settings.API_HOSTNAME, reverse("bugscache-list")) terms_requested = {} # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url, check_errors=check_errors) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_guid, name, 'json', json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts\ .get('text_log_summary', {})\ .get('step_data', {})\ .get('all_errors', []) for err in all_errors: # remove the mozharness prefix clean_line = get_mozharness_substring(err['line']) search_terms = [] # get a meaningful search term out of the error line search_term = get_error_search_term(clean_line) bugs = dict(open_recent=[], all_others=[]) # collect open recent and all other bugs suggestions if search_term: search_terms.append(search_term) if search_term not in terms_requested: # retrieve the list of suggestions from the api bugs = get_bugs_for_search_term(search_term, bugscache_uri) terms_requested[search_term] = bugs else: bugs = terms_requested[search_term] if not bugs or not (bugs['open_recent'] or bugs['all_others']): # no suggestions, try to use # the crash signature as search term crash_signature = get_crash_signature(clean_line) if crash_signature: search_terms.append(crash_signature) if crash_signature not in terms_requested: bugs = get_bugs_for_search_term( crash_signature, bugscache_uri) terms_requested[crash_signature] = bugs else: bugs = terms_requested[crash_signature] # TODO: Rename 'search' to 'error_text' or similar, since that's # closer to what it actually represents (bug 1091060). bug_suggestions.append({ "search": clean_line, "search_terms": search_terms, "bugs": bugs }) artifact_list.append( (job_guid, 'Bug suggestions', 'json', json.dumps(bug_suggestions))) return artifact_list
def parse_log(project, job_log_url, job_guid, check_errors=False): """ Call ArtifactBuilderCollection on the given job. """ credentials = OAuthCredentials.get_credentials(project) req = TreeherderRequest( protocol=settings.TREEHERDER_REQUEST_PROTOCOL, host=settings.TREEHERDER_REQUEST_HOST, project=project, oauth_key=credentials.get('consumer_key', None), oauth_secret=credentials.get('consumer_secret', None), ) update_endpoint = 'job-log-url/{0}/update_parse_status'.format( job_log_url['id']) try: log_url = job_log_url['url'] bug_suggestions = [] bugscache_uri = '{0}{1}'.format(settings.API_HOSTNAME, reverse("bugscache-list")) terms_requested = {} if log_url: # parse a log given its url artifact_bc = ArtifactBuilderCollection(log_url, check_errors=check_errors) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append( (job_guid, name, 'json', json.dumps(artifact))) if check_errors: all_errors = artifact_bc.artifacts.get( 'Structured Log', {}).get('step_data', {}).get('all_errors', []) for err in all_errors: # remove the mozharness prefix clean_line = get_mozharness_substring(err['line']) # get a meaningful search term out of the error line search_term = get_error_search_term(clean_line) bugs = dict(open_recent=[], all_others=[]) # collect open recent and all other bugs suggestions if search_term: if not search_term in terms_requested: # retrieve the list of suggestions from the api bugs = get_bugs_for_search_term( search_term, bugscache_uri) terms_requested[search_term] = bugs else: bugs = terms_requested[search_term] if not bugs or not (bugs['open_recent'] or bugs['all_others']): # no suggestions, try to use # the crash signature as search term crash_signature = get_crash_signature(clean_line) if crash_signature: if not crash_signature in terms_requested: bugs = get_bugs_for_search_term( crash_signature, bugscache_uri) terms_requested[crash_signature] = bugs else: bugs = terms_requested[crash_signature] bug_suggestions.append({ "search": clean_line, "bugs": bugs }) artifact_list.append((job_guid, 'Bug suggestions', 'json', json.dumps(bug_suggestions))) # store the artifacts generated tac = TreeherderArtifactCollection() for artifact in artifact_list: ta = tac.get_artifact({ "job_guid": artifact[0], "name": artifact[1], "type": artifact[2], "blob": artifact[3] }) tac.add(ta) req.post(tac) # send an update to job_log_url # the job_log_url status changes # from pending to running current_timestamp = time.time() status = 'parsed' req.send(update_endpoint, method='POST', data={ 'parse_status': status, 'parse_timestamp': current_timestamp }) except Exception, e: parse_log.retry(exc=e) # send an update to job_log_url # the job_log_url status changes # from pending to running current_timestamp = time.time() status = 'failed' req.send(update_endpoint, method='POST', data={ 'parse_status': status, 'parse_timestamp': current_timestamp }) # re raise the exception to leave a trace in the log raise