def project_info(request, project): try: jm = JobsModel(project) return HttpResponse(json.dumps({'max_job_id': jm.get_max_job_id()}), mimetype='application/json') except DatasetNotFoundError: return HttpResponseNotFound('Project does not exist')
def cycle_data(max_iterations=50, debug=False): projects = Repository.objects.all().values_list('name', flat=True) for project in projects: jm = JobsModel(project) sql_targets = {} if debug: print "Cycling Database: {0}".format(project) cycle_iterations = max_iterations while cycle_iterations > 0: sql_targets = jm.cycle_data(sql_targets) if debug: print "Iterations: {0}".format(str(cycle_iterations)) print "sql_targets" print sql_targets cycle_iterations -= 1 # No more items to delete if sql_targets['total_count'] == 0: cycle_iterations = 0 jm.disconnect()
def use_jobs_model(*args, **kwargs): project = kwargs["project"] jm = JobsModel(project) try: return model_func(*args, jm=jm, **kwargs) finally: jm.disconnect()
def populate_performance_series(project, series_type, series_data): jm = JobsModel(project) for t_range in settings.TREEHERDER_PERF_SERIES_TIME_RANGES: for signature in series_data: jm.store_performance_series( t_range['seconds'], series_type, signature, series_data[signature] ) jm.disconnect()
def publish_to_pulse(project, ids, data_type): jm = JobsModel(project) # Get appropriate data for data_type # using the ids provided data = [] if data_type == 'result_set': data = jm.get_result_set_list_by_ids(ids) jm.disconnect()
def project_info(request, project): try: jm = JobsModel(project) return HttpResponse(content=json.dumps({'max_job_id': jm.get_max_job_id(), 'max_performance_artifact_id': jm.get_max_performance_artifact_id()} ), content_type='application/json' ) except DatasetNotFoundError: return HttpResponseNotFound('Project does not exist')
def calculate_eta(sample_window_seconds=21600, debug=False): projects = Repository.objects.all().values_list('name', flat=True) for project in projects: jm = JobsModel(project) jm.calculate_eta(sample_window_seconds, debug) jm.disconnect()
def process_objects(limit=None): """ Process a number of objects from the objectstore and load them to the jobs store """ # default limit to 100 limit = limit or 100 for ds in Datasource.objects.all(): jm = JobsModel(ds.project) try: jm.process_objects(limit) finally: jm.disconnect()
def test_load_single_artifact(test_project, eleven_jobs_processed, mock_post_collection, mock_error_summary, sample_data): """ test loading a single artifact """ with JobsModel(test_project) as jobs_model: job = jobs_model.get_job_list(0, 1)[0] bs_blob = ["flim", "flam"] bs_artifact = { 'type': 'json', 'name': 'Bug suggestions', 'blob': json.dumps(bs_blob), 'job_guid': job['job_guid'] } with ArtifactsModel(test_project) as artifacts_model: artifacts_model.load_job_artifacts([bs_artifact], {bs_artifact['job_guid']: job}) artifacts = artifacts_model.get_job_artifact_list( 0, 10, conditions={'job_id': {('=', job["id"])}}) assert len(artifacts) == 1 artifact_names = {x['name'] for x in artifacts} act_bs_obj = [ x['blob'] for x in artifacts if x['name'] == 'Bug suggestions' ][0] assert set(artifact_names) == {'Bug suggestions'} assert bs_blob == act_bs_obj
def test_load_long_job_details(test_project, eleven_jobs_stored): # job details should still load even if excessively long, # they'll just be truncated with JobsModel(test_project) as jobs_model: job = jobs_model.get_job_list(0, 1)[0] max_field_length = JobDetail.MAX_FIELD_LENGTH (long_title, long_value, long_url) = ('t' * (2 * max_field_length), 'v' * (2 * max_field_length), 'https://' + ('u' * (2 * max_field_length))) ji_artifact = { 'type': 'json', 'name': 'Job Info', 'blob': json.dumps({ 'job_details': [{ 'title': long_title, 'value': long_value, 'url': long_url }] }), 'job_guid': job['job_guid'] } with ArtifactsModel(test_project) as am: am.load_job_artifacts([ji_artifact]) assert JobDetail.objects.count() == 1 jd = JobDetail.objects.all()[0] assert jd.title == long_title[:max_field_length] assert jd.value == long_value[:max_field_length] assert jd.url == long_url[:max_field_length]
def test_artifact_create_text_log_summary(webapp, test_project, eleven_jobs_stored, mock_post_json, mock_error_summary, sample_data): """ test submitting a text_log_summary artifact which auto-generates bug suggestions """ with JobsModel(test_project) as jobs_model: job = jobs_model.get_job_list(0, 1)[0] tls = sample_data.text_log_summary tac = client.TreeherderArtifactCollection() ta = client.TreeherderArtifact({ 'type': 'json', 'name': 'text_log_summary', 'blob': json.dumps(tls['blob']), 'job_guid': job['job_guid'] }) tac.add(ta) cli = client.TreeherderClient(protocol='http', host='localhost') cli.post_collection(test_project, tac) with ArtifactsModel(test_project) as artifacts_model: artifacts = artifacts_model.get_job_artifact_list(0, 10, conditions={ 'job_id': {('=', job["id"])} }) artifact_names = {x['name'] for x in artifacts} act_bs_obj = [x['blob'] for x in artifacts if x['name'] == 'Bug suggestions'][0] assert set(artifact_names) == {'Bug suggestions', 'text_log_summary'} assert mock_error_summary == act_bs_obj
def handle(self, *args, **options): self.is_debug = options['debug'] if options['os_cycle_interval']: os_cycle_interval = datetime.timedelta(days=options['os_cycle_interval']) else: os_cycle_interval = settings.OBJECTSTORE_CYCLE_INTERVAL if options['cycle_interval']: cycle_interval = datetime.timedelta(days=options['cycle_interval']) else: cycle_interval = settings.DATA_CYCLE_INTERVAL self.debug("cycle interval... objectstore: {}, jobs: {}".format(os_cycle_interval, cycle_interval)) projects = Datasource.objects\ .filter(contenttype='jobs')\ .values_list('project', flat=True) for project in projects: self.debug("Cycling Database: {0}".format(project)) with JobsModel(project) as jm: os_deleted, rs_deleted = jm.cycle_data(os_cycle_interval, cycle_interval, options['os_chunk_size'], options['chunk_size'], options['sleep_time']) self.debug("Deleted {} objectstore rows and {} resultsets from {}".format( os_deleted, rs_deleted, project))
def test_post_job_with_default_tier(test_project, result_set_stored, mock_post_json): """test submitting a job with no tier specified gets default""" tjc = client.TreeherderJobCollection() job_guid = 'd22c74d4aa6d2a1dcba96d95dccbd5fdca70cf33' tj = client.TreeherderJob({ 'project': test_project, 'revision_hash': result_set_stored[0]['revision_hash'], 'job': { 'job_guid': job_guid, 'state': 'completed', } }) tjc.add(tj) do_post_collection(test_project, tjc) with JobsModel(test_project) as jobs_model: job = [ x for x in jobs_model.get_job_list(0, 20) if x['job_guid'] == job_guid ][0] assert job['tier'] == 1
def handle(self, *args, **options): if not len(args) == 3: raise CommandError('3 arguments required, %s given' % len(args)) log_response = requests.get(args[0], timeout=30) log_response.raise_for_status() if log_response.text: log_content = StringIO(log_response.text) try: repository = Repository.objects.get(name=args[2], active_status='active') except Repository.DoesNotExist: raise CommandError('Unknown repository %s' % args[2]) log_iter = reader.read(log_content) failure_lines_cutoff = settings.FAILURE_LINES_CUTOFF log_iter = list(islice(log_iter, failure_lines_cutoff+1)) if len(log_iter) > failure_lines_cutoff: # Alter the N+1th log line to indicate the list was truncated. log_iter[-1].update(action='truncated') with JobsModel(args[2]) as jobs_model: job_id = jobs_model.get_job_ids_by_guid([args[1]]) if not job_id: raise CommandError('No job found with guid %s in the %s repository' % (args[1], args[2])) FailureLine.objects.bulk_create( [FailureLine(repository=repository, job_guid=args[1], **failure_line) for failure_line in log_iter] )
def check_artifacts(test_project, job_guid, parse_status, num_artifacts, exp_artifact_names=None, exp_error_summary=None): with JobsModel(test_project) as jobs_model: job_id = [ x['id'] for x in jobs_model.get_job_list(0, 20) if x['job_guid'] == job_guid ][0] job_log_list = jobs_model.get_job_log_url_list([job_id]) assert len(job_log_list) == 1 assert job_log_list[0]['parse_status'] == parse_status with ArtifactsModel(test_project) as artifacts_model: artifacts = artifacts_model.get_job_artifact_list( 0, 10, conditions={'job_id': {('=', job_id)}}) assert len(artifacts) == num_artifacts if exp_artifact_names: artifact_names = {x['name'] for x in artifacts} assert set(artifact_names) == exp_artifact_names if exp_error_summary: act_bs_obj = [ x['blob'] for x in artifacts if x['name'] == 'Bug suggestions' ][0] assert exp_error_summary == act_bs_obj
def create(self, request, project): artifacts = ArtifactsModel.serialize_artifact_json_blobs(request.DATA) job_guids = [x['job_guid'] for x in artifacts] with JobsModel(project) as jobs_model, ArtifactsModel( project) as artifacts_model: job_id_lookup = jobs_model.get_job_ids_by_guid(job_guids) artifacts_model.load_job_artifacts(artifacts, job_id_lookup) # If a ``text_log_summary`` and ``Bug suggestions`` artifact are # posted here together, for the same ``job_guid``, then just load # them. This is how it is done internally in our log parser # so there is no delay in creation and the bug suggestions show # as soon as the log is parsed. # # If a ``text_log_summary`` is posted WITHOUT an accompanying # ``Bug suggestions`` artifact, then schedule to create it # asynchronously so that this api does not take too long. tls_list = get_artifacts_that_need_bug_suggestions(artifacts) # tls_list will contain all ``text_log_summary`` artifacts that # do NOT have an accompanying ``Bug suggestions`` artifact in this # current list of artifacts. If it's empty, then we don't need # to schedule anything. if tls_list: populate_error_summary.apply_async( args=[project, tls_list, job_id_lookup], routing_key='error_summary') return Response({'message': 'Artifacts stored successfully'})
def unclassified_failure_count(projects=None): if not projects: projects = Repository.objects.all().values_list('name', flat=True) unclassified_failure_publisher = UnclassifiedFailureCountPublisher(settings.BROKER_URL) for project in projects: jm = JobsModel(project) count = jm.get_unclassified_failure_count() count_excluded = jm.get_unclassified_failure_count_excluded() unclassified_failure_publisher.publish(project, count, count_excluded) jm.disconnect() unclassified_failure_publisher.disconnect()
def test_load_long_job_details(test_project, eleven_jobs_stored): # job details should still load even if excessively long, # they'll just be truncated with JobsModel(test_project) as jobs_model: job = jobs_model.get_job_list(0, 1)[0] def max_length(field): """Get the field's max_length for the JobDetail model""" return JobDetail._meta.get_field(field).max_length (long_title, long_value, long_url) = ('t' * (2 * max_length("title")), 'v' * (2 * max_length("value")), 'https://' + ('u' * (2 * max_length("url")))) ji_artifact = { 'type': 'json', 'name': 'Job Info', 'blob': json.dumps({ 'job_details': [{ 'title': long_title, 'value': long_value, 'url': long_url }] }), 'job_guid': job['job_guid'] } with ArtifactsModel(test_project) as am: am.load_job_artifacts([ji_artifact]) assert JobDetail.objects.count() == 1 jd = JobDetail.objects.all()[0] assert jd.title == long_title[:max_length("title")] assert jd.value == long_value[:max_length("value")] assert jd.url == long_url[:max_length("url")]
def unstructured_bugs(self): """ Get bugs that match this line in the Bug Suggestions artifact for this job. """ components = self._serialized_components() if not components: return [] # Importing this at the top level causes circular import misery from treeherder.model.derived import JobsModel, ArtifactsModel with JobsModel(self.repository.name) as jm, \ ArtifactsModel(self.repository.name) as am: job_id = jm.get_job_ids_by_guid([self.job_guid ])[self.job_guid]["id"] bug_suggestions = am.filter_bug_suggestions( am.bug_suggestions(job_id)) rv = [] ids_seen = set() for item in bug_suggestions: if all(component in item["search"] for component in components): for suggestion in itertools.chain(item["bugs"]["open_recent"], item["bugs"]["all_others"]): if suggestion["id"] not in ids_seen: ids_seen.add(suggestion["id"]) rv.append(suggestion) return rv
def publish_to_pulse(project, ids, data_type): # If we don't have a publisher (because of missing configs), then we can't # publish any pulse messages. This is okay, local installs etc. doesn't # need to publish on pulse, and requiring a pulse user is adding more # overhead to an already large development setup process. if not publisher: return jm = JobsModel(project) try: # Publish messages with new result-sets if data_type == 'result_set': # Get appropriate data for data_type # using the ids provided for entry in jm.get_result_set_list_by_ids(ids): repository = jm.refdata_model.get_repository_info(entry['repository_id']) entry['repository_url'] = repository['url'] # Don't expose these properties, they are internal, at least that's # what I think without documentation I have no clue... what any of # this is del entry['revisions'] # Not really internal, but too big del entry['repository_id'] # Set required properties entry['version'] = 1 entry['project'] = project # Property revision_hash should already be there, I suspect it is the # result-set identifier... # publish the data to pulse publisher.new_result_set( message = entry, revision_hash = entry['revision_hash'], project = project ) # Basically, I have no idea what context this runs and was inherently # unable to make kombu with or without pyamqp, etc. confirm-publish, # so we're stuck with this super ugly hack where we just close the # connection so that if the process context is destroyed then at least # messages will still get published... Well, assuming nothing goes # wrong, because we're not using confirm channels for publishing... publisher.connection.release() finally: jm.disconnect()
def handle(self, *args, **options): if not len(args) == 2: raise CommandError('2 arguments required, %s given' % len(args)) repository, job_guid = args with JobsModel(repository) as jm: match_errors(repository, jm, job_guid)
def retrieve(self, request, *args, **kwargs): request = th_serializers.RepositorySerializer( self.queryset.get(pk=kwargs['pk'])) new_request = request.data.copy() with JobsModel(request.data['name']) as jobs_model: new_request.update({'max_job_id': jobs_model.get_max_job_id()}) return Response(new_request)
def project_info(request, project): try: with JobsModel(project) as jobs_model: return HttpResponse(content=json.dumps( {'max_job_id': jobs_model.get_max_job_id()}), content_type='application/json') except DatasetNotFoundError: return HttpResponseNotFound('Project does not exist')
def generate_request_body(self): """ Create the data structure that will be sent to Elasticsearch. """ with JobsModel(self.project) as jobs_model, ArtifactsModel( self.project) as artifacts_model: job_data = jobs_model.get_job(self.job_id)[0] option_collection = jobs_model.refdata_model.get_all_option_collections( ) revision_list = jobs_model.get_resultset_revisions_list( job_data["result_set_id"]) buildapi_artifact = artifacts_model.get_job_artifact_list( 0, 1, { 'job_id': set([("=", self.job_id)]), 'name': set([("=", "buildapi")]) }) if buildapi_artifact: buildname = buildapi_artifact[0]["blob"]["buildername"] else: # OrangeFactor needs a buildname to be set or it skips the failure # classification, so we make one up for non-buildbot jobs. buildname = 'non-buildbot %s test %s' % ( job_data["platform"], job_data["job_type_name"]) self.body = { "buildname": buildname, "machinename": job_data["machine_name"], "os": job_data["platform"], # I'm using the request time date here, as start time is not # available for pending jobs "date": datetime.fromtimestamp(int( job_data["submit_timestamp"])).strftime("%Y-%m-%d"), "type": job_data["job_type_name"], "buildtype": option_collection[job_data["option_collection_hash"]]["opt"], # Intentionally using strings for starttime, bug, timestamp for compatibility # with TBPL's legacy output format. "starttime": str(job_data["start_timestamp"]), "tree": self.project, "rev": revision_list[0]["revision"], "bug": str(self.bug_id), "who": self.who, "timestamp": str(self.classification_timestamp), "treeherder_job_id": self.job_id, }
def publish_to_pulse(project, ids, data_type): # If we don't have a publisher (because of missing configs), then we can't # publish any pulse messages. This is okay, local installs etc. doesn't # need to publish on pulse, and requiring a pulse user is adding more # overhead to an already large development setup process. if not publisher: return jm = JobsModel(project) try: # Publish messages with new result-sets if data_type == 'result_set': # Get appropriate data for data_type # using the ids provided for entry in jm.get_result_set_list_by_ids(ids): # Don't expose these properties, they are internal, at least that's # what I think without documentation I have no clue... what any of # this is del entry['revisions'] # Not really internal, but too big del entry['repository_id'] # Set required properties entry['version'] = 1 entry['project'] = project # Property revision_hash should already be there, I suspect it is the # result-set identifier... # publish the data to pulse publisher.new_result_set( message = entry, revision_hash = entry['revision_hash'], project = project ) # Basically, I have no idea what context this runs and was inherently # unable to make kombu with or without pyamqp, etc. confirm-publish, # so we're stuck with this super ugly hack where we just close the # connection so that if the process context is destroyed then at least # messages will still get published... Well, assuming nothing goes # wrong, because we're not using confirm channels for publishing... publisher.connection.release() finally: jm.disconnect()
def test_load_non_ascii_textlog_errors(test_project, eleven_jobs_stored): with JobsModel(test_project) as jobs_model: job = jobs_model.get_job_list(0, 1)[0] text_log_summary_artifact = { 'type': 'json', 'name': 'text_log_summary', 'blob': json.dumps({ 'step_data': { "steps": [{ 'name': 'foo', 'started': '2016-05-10 12:44:23.103904', 'started_linenumber': 8, 'finished_linenumber': 10, 'finished': '2016-05-10 12:44:23.104394', 'result': 'success', 'errors': [ { # non-ascii character "line": '07:51:28 WARNING - \U000000c3'.encode('utf-8'), "linenumber": 1587 }, { # astral character (i.e. higher than ucs2) "line": '07:51:29 WARNING - \U0001d400'.encode('utf-8'), "linenumber": 1588 } ] }] } }), 'job_guid': job['job_guid'] } with ArtifactsModel(test_project) as am: am.load_job_artifacts([text_log_summary_artifact]) assert TextLogError.objects.count() == 2 assert TextLogError.objects.get( line_number=1587).line == '07:51:28 WARNING - \U000000c3' assert TextLogError.objects.get( line_number=1588).line == '07:51:29 WARNING - <U+01D400>'
def generate_request_body(self): """ Create the data structure required by tbpl's submitBuildStar.php script It's used by both the bug_job_map endpoint and the job note endpoint. """ jm = JobsModel(self.project) try: buildapi_artifact = jm.get_job_artifact_list(0, 1, { 'job_id': set([("=", self.job_id)]), 'name': set([("=", "buildapi_complete")]) })[0] job_data = jm.get_job(self.job_id)[0] finally: jm.disconnect() note = "" if self.bug_id: note = "Bug {0}".format(self.bug_id) if self.classification_id: if note: note += " - " note += FailureClassification.objects.get( id=self.classification_id).name if self.note: if note: note += " - " note += self.note self.body = { "id": buildapi_artifact["blob"]["id"], "machinename": job_data["machine_name"], "starttime": int(job_data["start_timestamp"]), "note": note, "who": self.who }
def use_jobs_model(*args, **kwargs): project = kwargs["project"] try: jm = JobsModel(project) return model_func(*args, jm=jm, **kwargs) except DatasetNotFoundError as e: return Response( {"message": "No project with name {0}".format(project)}, status=404, ) except ObjectNotFoundException as e: return Response({"message": unicode(e)}, status=404) except Exception as e: # pragma nocover msg = {"message": unicode(e)} if settings.DEBUG: import traceback msg["traceback"] = traceback.format_exc() return Response(msg, status=500) finally: jm.disconnect()
def populate_performance_series(project, series_type, series_data): jm = JobsModel(project) for t_range in settings.TREEHERDER_PERF_SERIES_TIME_RANGES: for signature in series_data: jm.store_performance_series(t_range['seconds'], series_type, signature, series_data[signature]) jm.disconnect()
def test_artifact_create_text_log_summary_and_bug_suggestions( webapp, test_project, eleven_jobs_stored, mock_post_json, mock_error_summary, sample_data): """ test submitting text_log_summary and Bug suggestions artifacts This should NOT generate a Bug suggestions artifact, just post the one submitted. """ with JobsModel(test_project) as jobs_model: job = jobs_model.get_job_list(0, 1)[0] tls = sample_data.text_log_summary bs_blob = ["flim", "flam"] tac = client.TreeherderArtifactCollection() tac.add( client.TreeherderArtifact({ 'type': 'json', 'name': 'text_log_summary', 'blob': json.dumps(tls['blob']), 'job_guid': job['job_guid'] })) tac.add( client.TreeherderArtifact({ 'type': 'json', 'name': 'Bug suggestions', 'blob': bs_blob, 'job_guid': job['job_guid'] })) credentials = OAuthCredentials.get_credentials(test_project) auth = TreeherderAuth(credentials['consumer_key'], credentials['consumer_secret'], test_project) cli = client.TreeherderClient(protocol='http', host='localhost', auth=auth) cli.post_collection(test_project, tac) with ArtifactsModel(test_project) as artifacts_model: artifacts = artifacts_model.get_job_artifact_list( 0, 10, conditions={'job_id': {('=', job["id"])}}) assert len(artifacts) == 2 artifact_names = {x['name'] for x in artifacts} act_bs_obj = [ x['blob'] for x in artifacts if x['name'] == 'Bug suggestions' ][0] assert set(artifact_names) == {'Bug suggestions', 'text_log_summary'} assert bs_blob == act_bs_obj
def generate_request_body(self): """ Create the data structure that will be sent to Elasticsearch. """ with JobsModel(self.project) as jobs_model: job_data = jobs_model.get_job(self.job_id)[0] buildtype = " ".join( sorted( OptionCollection.objects.values_list( 'option__name', flat=True).filter(option_collection_hash=job_data[ "option_collection_hash"]))) revision = Push.objects.values_list( 'revision', flat=True).get(id=job_data['push_id']) ref_data_name = job_data["ref_data_name"] self.body = { "buildname": ref_data_name, "machinename": job_data["machine_name"], "os": job_data["platform"], # I'm using the request time date here, as start time is not # available for pending jobs "date": datetime.utcfromtimestamp( job_data["submit_timestamp"]).strftime("%Y-%m-%d"), "type": job_data["job_type_name"], "buildtype": buildtype, # Intentionally using strings for starttime, bug, timestamp for compatibility # with TBPL's legacy output format. "starttime": str(job_data["start_timestamp"]), "tree": self.project, "rev": revision, "bug": str(self.bug_id), "who": self.who, "timestamp": str(self.classification_timestamp), "treeherder_job_id": self.job_id, }
def handle(self, *args, **options): self.is_debug = options['debug'] cycle_interval = datetime.timedelta(days=options['days']) self.debug("cycle interval... {}".format(cycle_interval)) projects = Datasource.objects.values_list('project', flat=True) for project in projects: self.debug("Cycling Database: {0}".format(project)) with JobsModel(project) as jm: rs_deleted = jm.cycle_data(cycle_interval, options['chunk_size'], options['sleep_time']) self.debug("Deleted {} jobs from {}".format(rs_deleted, project))
def generate_request_body(self): """ Create the data structure required by tbpl's starcomment.php script """ jm = JobsModel(self.project) try: buildapi_artifact = jm.get_job_artifact_list( 0, 1, { 'job_id': set([("=", self.job_id)]), 'name': set([("=", "buildapi")]) })[0] job_data = jm.get_job(self.job_id)[0] option_collection = jm.refdata_model.get_all_option_collections() revision_list = jm.get_resultset_revisions_list( job_data["result_set_id"]) finally: jm.disconnect() self.body = { "buildname": buildapi_artifact["blob"]["buildername"], "machinename": job_data["machine_name"], "os": job_data["platform"], # I'm using the request time date here, as start time is not # available for pending jobs "date": datetime.fromtimestamp(int( job_data["submit_timestamp"])).strftime("%Y-%m-%d"), "type": job_data["job_type_name"], "buildtype": option_collection[job_data["option_collection_hash"]]["opt"], "starttime": int(job_data["start_timestamp"]), # "logfile": "", "tree": self.project, "rev": revision_list[0]["revision"], "comment": "Bug {0}".format(self.bug_id), "who": self.who, "timestamp": self.submit_timestamp, "logfile": "00000000" }
def handle(self, *args, **options): self.is_debug = options['debug'] if options['cycle_interval']: cycle_interval = datetime.timedelta(days=options['cycle_interval']) else: cycle_interval = settings.DATA_CYCLE_INTERVAL self.debug("cycle interval: {0}".format(cycle_interval)) projects = Datasource.objects\ .filter(contenttype='jobs')\ .values_list('project', flat=True) for project in projects: self.debug("Cycling Database: {0}".format(project)) jm = JobsModel(project) try: num_deleted = jm.cycle_data(cycle_interval, options['chunk_size'], options['sleep_time']) self.debug("Deleted {0} resultsets from {1}".format( num_deleted, project)) finally: jm.disconnect()
def test_load_artifact_second_time_fails( test_project, eleven_jobs_stored, mock_post_json, mock_error_summary, sample_data): """ test loading two of the same named artifact only gets the first one """ with JobsModel(test_project) as jobs_model: job = jobs_model.get_job_list(0, 1)[0] bs_blob = ["flim", "flam"] bs_artifact1 = { 'type': 'json', 'name': 'Bug suggestions', 'blob': json.dumps(bs_blob), 'job_guid': job['job_guid'] } bs_artifact2 = { 'type': 'json', 'name': 'Bug suggestions', 'blob': json.dumps(["me", "you"]), 'job_guid': job['job_guid'] } with ArtifactsModel(test_project) as artifacts_model: artifacts_model.load_job_artifacts( [bs_artifact1], {bs_artifact1['job_guid']: job} ) artifacts_model.load_job_artifacts( [bs_artifact2], {bs_artifact2['job_guid']: job} ) artifacts = artifacts_model.get_job_artifact_list(0, 10, conditions={ 'job_id': {('=', job["id"])} }) assert len(artifacts) == 1 artifact_names = {x['name'] for x in artifacts} act_bs_obj = [x['blob'] for x in artifacts if x['name'] == 'Bug suggestions'][0] assert set(artifact_names) == {'Bug suggestions'} assert bs_blob == act_bs_obj
def match_errors(job): # Only try to autoclassify where we have a failure status; sometimes there can be # error lines even in jobs marked as passing. with JobsModel(job.repository.name) as jm: ds_job = jm.get_job(job.project_specific_id)[0] if ds_job["result"] not in ["testfailed", "busted", "exception"]: return unmatched_failures = set(FailureLine.objects.unmatched_for_job(job)) if not unmatched_failures: return matches, all_matched = find_matches(unmatched_failures) update_db(job, matches, all_matched)
def lookup_revisions(revision_dict): """ Retrieve a list of revision->resultset lookups """ from treeherder.model.derived import JobsModel lookup = dict() for project, revisions in revision_dict.items(): revision_list = list(set(revisions)) with JobsModel(project) as jm: lookup_content = jm.get_resultset_all_revision_lookup(revision_list) if lookup_content: lookup[project] = lookup_content return lookup
def generate_request_body(self): """ Create the data structure required by tbpl's starcomment.php script """ jm = JobsModel(self.project) try: buildapi_artifact = jm.get_job_artifact_list(0, 1, { 'job_id': set([("=", self.job_id)]), 'name': set([("=", "buildapi")]) })[0] job_data = jm.get_job(self.job_id)[0] option_collection = jm.refdata_model.get_all_option_collections() revision_list = jm.get_resultset_revisions_list(job_data["result_set_id"]) finally: jm.disconnect() self.body = { "buildname": buildapi_artifact["blob"]["buildername"], "machinename": job_data["machine_name"], "os": job_data["platform"], # I'm using the request time date here, as start time is not # available for pending jobs "date": datetime.fromtimestamp( int(job_data["submit_timestamp"])).strftime("%Y-%m-%d"), "type": job_data["job_type_name"], "buildtype": option_collection[ job_data["option_collection_hash"] ]["opt"], "starttime": int(job_data["start_timestamp"]), # "logfile": "", "tree": self.project, "rev": revision_list[0]["revision"], "comment": "Bug {0}".format(self.bug_id), "who": self.who, "timestamp": self.submit_timestamp, "logfile": "00000000" }
def parse_log(project, job_id, result_set_id, check_errors=False): """ Call ArtifactBuilderCollection on the given job. """ pattern_obj = re.compile('\d+:\d+:\d+\s+') jm = JobsModel(project=project) rdm = RefDataManager() open_bugs_cache = {} closed_bugs_cache = {} status_publisher = JobStatusPublisher(settings.BROKER_URL) failure_publisher = JobFailurePublisher(settings.BROKER_URL) try: # return the resultset with the job id to identify if the UI wants # to fetch the whole thing. resultset = jm.get_result_set_by_id(result_set_id=result_set_id)[0] del(resultset["active_status"]) del(resultset["revision_hash"]) log_references = jm.get_log_references(job_id) # we may have many log references per job for log in log_references: # parse a log given its url artifact_bc = ArtifactBuilderCollection( log['url'], check_errors=check_errors, ) artifact_bc.parse() artifact_list = [] for name, artifact in artifact_bc.artifacts.items(): artifact_list.append((job_id, name, 'json', json.dumps(artifact))) if check_errors: # I'll try to begin with a full_text search on the entire row all_errors = artifact_bc.artifacts['Structured Log']['step_data']['all_errors'] open_bugs_suggestions = {} closed_bugs_suggestions = {} for err in all_errors: # remove timestamp clean_line = pattern_obj.sub('', err['line']) if clean_line not in open_bugs_cache: open_bugs_cache[clean_line] = rdm.get_suggested_bugs( clean_line) if clean_line not in closed_bugs_cache: closed_bugs_cache[clean_line] = rdm.get_suggested_bugs( clean_line, open_bugs=False) open_bugs_suggestions[ err['line'] ] = open_bugs_cache[clean_line] closed_bugs_suggestions[ err['line'] ] = closed_bugs_cache[clean_line] artifact_list.append((job_id, 'Open bugs', 'json', json.dumps(open_bugs_suggestions))) artifact_list.append((job_id, 'Closed bugs', 'json', json.dumps(closed_bugs_suggestions))) # store the artifacts generated jm.store_job_artifact(artifact_list) status_publisher.publish(job_id, resultset, project, 'processed') if check_errors: failure_publisher.publish(job_id, project) finally: rdm.disconnect() jm.disconnect() status_publisher.disconnect() failure_publisher.disconnect()
def _process_all_objects_for_project(self, project): jm = JobsModel(project) while jm.get_num_unprocessed_objects() > 0: process_objects.delay(project=project)
def generate_request_body(self): """ Create the data structure required by tbpl's submitBugzillaComment.php script This is triggered by a new bug-job association. """ jm = JobsModel(self.project) try: job = jm.get_job(self.job_id)[0] failures_artifacts = jm.get_job_artifact_list(0, 1, { 'job_id': set([('=', job['id'])]), 'name': set([('=', 'Bug suggestions')]), }) error_lines = [] for artifact in failures_artifacts: # a bug suggestion aritfact looks like this: # [{ "search": "my-error-line", "bugs": ....}] error_lines += [line["search"] for line in artifact["blob"]] bug_job_map = jm.get_bug_job_map_detail(self.job_id, self.bug_id) revision_list = jm.get_resultset_revisions_list( job["result_set_id"] ) buildapi_info = jm.get_job_artifact_list(0, 1, { 'job_id': set([("=", self.job_id)]), 'name': set([("=", "buildapi")]) }) finally: jm.disconnect() who = bug_job_map["who"]\ .replace("@", "[at]")\ .replace(".", "[dot]") submit_date = datetime.fromtimestamp(bug_job_map["submit_timestamp"])\ .replace(microsecond=0)\ .isoformat() job_description = { 'repository': self.project, 'who': who, 'submit_timestamp': submit_date, 'log': "{0}{1}/logviewer.html#?repo={2}&job_id={3}".format( settings.SITE_URL, settings.UI_PREFIX, self.project, self.job_id ), 'machine': job["machine_name"], 'revision': revision_list[0]["revision"], } if buildapi_info: job_description['buildname'] = buildapi_info[0]["blob"]["buildername"] body_comment = '\n'.join( ["{0}: {1}".format(k, v) for k, v in job_description.items()]) body_comment += '\n\n' body_comment += '\n'.join(error_lines) self.body = { "id": self.bug_id, "comment": body_comment }