def store_performance_artifact( self, job_ids, performance_artifact_placeholders): """ Store the performance data """ # Retrieve list of job signatures associated with the jobs job_data = self.get_job_signatures_from_ids(job_ids) job_ref_data_signatures = set() map( lambda job_guid: job_ref_data_signatures.add( job_data[job_guid]['signature'] ), job_data.keys() ) # Retrieve associated data in reference_data_signatures reference_data = self.refdata_model.get_reference_data( list(job_ref_data_signatures)) tda = TalosDataAdapter() for perf_data in performance_artifact_placeholders: job_guid = perf_data["job_guid"] ref_data_signature = job_data[job_guid]['signature'] ref_data = reference_data[ref_data_signature] if 'signature' in ref_data: del ref_data['signature'] # adapt and load data into placeholder structures tda.adapt_and_load(self.project, ref_data, job_data, perf_data)
def test_adapt_and_load(): talos_perf_data = SampleData.get_talos_perf_data() tda = TalosDataAdapter() result_count = 0 for datum in talos_perf_data: datum = { "job_guid": 'oqiwy0q847365qiu', "name": "test", "type": "test", "blob": datum } job_data = { "oqiwy0q847365qiu": { "id": 1, "result_set_id": 1, "push_timestamp": 1402692388 } } reference_data = { "property1": "value1", "property2": "value2", "property3": "value3" } # one extra result for the summary series result_count += len(datum['blob']["results"]) + 1 # we create one performance series per counter if 'talos_counters' in datum['blob']: result_count += len(datum['blob']["talos_counters"]) # Mimic production environment, the blobs are serialized # when the web service receives them datum['blob'] = json.dumps({'talos_data': [datum['blob']]}) tda.adapt_and_load(reference_data, job_data, datum) assert result_count == len(tda.performance_artifact_placeholders)
def _rewrite_series(self, jm, signature_hash, signature_properties, subtest_signature_mapping, extra_subtest_signatures): new_props = TalosDataAdapter._transform_signature_properties( signature_properties, significant_keys=Command.SIGNIFICANT_KEYS) if 'subtest_signatures' in new_props: suitekey = self._get_suitekey(new_props) # rewrite a new set of subtest signatures old_subtest_signatures = new_props['subtest_signatures'] new_subtest_signatures = set() for old_signature in old_subtest_signatures: try: new_subtest_signatures.add( subtest_signature_mapping[old_signature]) except: # key may not exist if script interrupted, get # suite signatures via extra_subtest_signatures for sig in extra_subtest_signatures.get(suitekey, []): new_subtest_signatures.add(sig) new_props['subtest_signatures'] = sorted(new_subtest_signatures) new_hash = TalosDataAdapter.get_series_signature(new_props) print "%s -> %s" % (signature_hash, new_hash) jm.set_series_signature(new_hash, new_props) for time_interval in PerformanceTimeInterval.all_valid_time_intervals(): series_list = jm.get_performance_series_from_signatures( [signature_hash], time_interval) series = utils.decompress_if_needed(series_list[0]['blob']) jm.store_performance_series(time_interval, 'talos_data', str(new_hash), series) jm.execute(proc='jobs.deletes.delete_performance_series', placeholders=[signature_hash]) jm.execute(proc='jobs.deletes.delete_series_signature', placeholders=[signature_hash]) return new_hash
def _rewrite_series(self, jm, signature_hash, signature_properties, subtest_signature_mapping, extra_subtest_signatures): new_props = TalosDataAdapter._transform_signature_properties( signature_properties, significant_keys=Command.SIGNIFICANT_KEYS) if 'subtest_signatures' in new_props: suitekey = self._get_suitekey(new_props) # rewrite a new set of subtest signatures old_subtest_signatures = new_props['subtest_signatures'] new_subtest_signatures = set() for old_signature in old_subtest_signatures: try: new_subtest_signatures.add( subtest_signature_mapping[old_signature]) except: # key may not exist if script interrupted, get # suite signatures via extra_subtest_signatures for sig in extra_subtest_signatures.get(suitekey, []): new_subtest_signatures.add(sig) new_props['subtest_signatures'] = sorted(new_subtest_signatures) new_hash = TalosDataAdapter.get_series_signature(new_props) print "%s -> %s" % (signature_hash, new_hash) jm.set_series_signature(new_hash, new_props) for time_interval in PerformanceTimeInterval.all_valid_time_intervals( ): series_list = jm.get_performance_series_from_signatures( [signature_hash], time_interval) series = series_list[0]['blob'] jm.store_performance_series(time_interval, 'talos_data', str(new_hash), series) jm.jobs_execute(proc='jobs.deletes.delete_performance_series', placeholders=[signature_hash]) jm.jobs_execute(proc='jobs.deletes.delete_series_signature', placeholders=[signature_hash]) return new_hash
def test_performance_log_parsing(): """ Make sure all performance data log examples validate with the datazilla json schema. """ sd = SampleData() files = sd.get_performance_logs() tda = TalosDataAdapter() for file_url in files: builder = BuildbotPerformanceDataArtifactBuilder(url=file_url) lpc = ArtifactBuilderCollection(file_url, builders=[builder]) lpc.parse() act = lpc.artifacts[builder.name] # Validate the data returned has the required datazilla # json schema validate(act['talos_data'][0], tda.datazilla_schema)
def store_performance_artifact( self, job_ids, performance_artifact_placeholders): """ Store the performance data """ # Retrieve list of job signatures associated with the jobs job_data = self.get_job_signatures_from_ids(job_ids) job_ref_data_signatures = set() map( lambda job_guid: job_ref_data_signatures.add( job_data[job_guid]['signature'] ), job_data.keys() ) # Retrieve associated data in reference_data_signatures reference_data = self.refdata_model.get_reference_data( list(job_ref_data_signatures)) tda = TalosDataAdapter() for perf_data in performance_artifact_placeholders: job_guid = perf_data["job_guid"] ref_data_signature = job_data[job_guid]['signature'] ref_data = reference_data[ref_data_signature] if 'signature' in ref_data: del ref_data['signature'] # adapt and load data into placeholder structures tda.adapt_and_load(ref_data, job_data, perf_data) self.jobs_execute( proc="jobs.inserts.set_performance_artifact", debug_show=self.DEBUG, placeholders=tda.performance_artifact_placeholders, executemany=True) self.jobs_execute( proc='jobs.inserts.set_series_signature', debug_show=self.DEBUG, placeholders=tda.signature_property_placeholders, executemany=True) tda.submit_tasks(self.project)
def test_adapt_and_load(self): talos_perf_data = SampleData.get_talos_perf_data() for talos_datum in talos_perf_data: datum = { "job_guid": 'oqiwy0q847365qiu', "name": "test", "type": "test", "blob": talos_datum } job_data = { "oqiwy0q847365qiu": { "id": 1, "result_set_id": 1, "push_timestamp": 1402692388 } } reference_data = { "property1": "value1", "property2": "value2", "property3": "value3" } # Mimic production environment, the blobs are serialized # when the web service receives them datum['blob'] = json.dumps({'talos_data': [datum['blob']]}) tda = TalosDataAdapter() tda.adapt_and_load(reference_data, job_data, datum) # base: subtests + one extra result for the summary series expected_result_count = len(talos_datum["results"]) + 1 # we create one performance series per counter if 'talos_counters' in talos_datum: expected_result_count += len(talos_datum["talos_counters"]) # result count == number of signatures self.assertEqual(expected_result_count, len(tda.signatures.keys())) # verify that we have signatures for the subtests signature_placeholders = copy.copy( tda.signature_property_placeholders) for (testname, results) in talos_datum["results"].iteritems(): signature_placeholder = filter( lambda p: p[2] == testname, signature_placeholders) self.assertEqual(len(signature_placeholder), 1) signature_hash = signature_placeholder[0][0] perfdata = tda.signatures[signature_hash][0] if talos_datum.get('summary'): # if we have a summary, ensure the subtest summary values made # it in for measure in ['min', 'max', 'std', 'mean', 'median']: self.assertEqual( round(talos_datum['summary']['subtests'][testname][measure], 2), perfdata[measure]) else: # this is an old style talos blob without a summary. these are going # away, so I'm not going to bother testing the correctness. however # let's at least verify that some values are being generated here for measure in ['min', 'max', 'std', 'mean', 'median']: self.assertTrue(perfdata[measure]) # filter out this signature from data to process signature_placeholders = filter( lambda p: p[0] != signature_hash, signature_placeholders) # if we have counters, verify that the series for them is as expected for (counter, results) in talos_datum.get('talos_counters', {}).iteritems(): signature_placeholder = filter( lambda p: p[2] == counter, signature_placeholders) self.assertEqual(len(signature_placeholder), 1) signature_hash = signature_placeholder[0][0] perfdata = tda.signatures[signature_hash][0] for measure in ['max', 'mean']: self.assertEqual(round(float(results[measure]), 2), perfdata[measure]) # filter out this signature from data to process signature_placeholders = filter( lambda p: p[0] != signature_hash, signature_placeholders) # we should be left with just summary signature placeholders self.assertEqual(len(signature_placeholders), 2) perfdata = tda.signatures[signature_placeholders[0][0]][0] if talos_datum.get('summary'): self.assertEqual(round(talos_datum['summary']['suite'], 2), perfdata['geomean']) else: # old style talos blob without summary. again, going away, # but let's at least test that we have the 'geomean' value # generated self.assertTrue(perfdata['geomean'])
def test_adapt_and_load(self): talos_perf_data = SampleData.get_talos_perf_data() for talos_datum in talos_perf_data: datum = { "job_guid": 'oqiwy0q847365qiu', "name": "test", "type": "test", "blob": talos_datum } job_data = { "oqiwy0q847365qiu": { "id": 1, "result_set_id": 1, "push_timestamp": 1402692388 } } reference_data = { "property1": "value1", "property2": "value2", "property3": "value3" } # Mimic production environment, the blobs are serialized # when the web service receives them datum['blob'] = json.dumps({'talos_data': [datum['blob']]}) tda = TalosDataAdapter() tda.adapt_and_load(reference_data, job_data, datum) # base: subtests + one extra result for the summary series expected_result_count = len(talos_datum["results"]) + 1 # we create one performance series per counter if 'talos_counters' in talos_datum: expected_result_count += len(talos_datum["talos_counters"]) # result count == number of signatures self.assertEqual(expected_result_count, len(tda.signatures.keys())) # verify that we have signatures for the subtests signature_placeholders = copy.copy( tda.signature_property_placeholders) for (testname, results) in talos_datum["results"].iteritems(): signature_placeholder = filter(lambda p: p[2] == testname, signature_placeholders) self.assertEqual(len(signature_placeholder), 1) signature_hash = signature_placeholder[0][0] perfdata = tda.signatures[signature_hash][0] if talos_datum.get('summary'): # if we have a summary, ensure the subtest summary values made # it in for measure in ['min', 'max', 'std', 'mean', 'median']: self.assertEqual( round( talos_datum['summary']['subtests'][testname] [measure], 2), perfdata[measure]) else: # this is an old style talos blob without a summary. these are going # away, so I'm not going to bother testing the correctness. however # let's at least verify that some values are being generated here for measure in ['min', 'max', 'std', 'mean', 'median']: self.assertTrue(perfdata[measure]) # filter out this signature from data to process signature_placeholders = filter( lambda p: p[0] != signature_hash, signature_placeholders) # if we have counters, verify that the series for them is as expected for (counter, results) in talos_datum.get('talos_counters', {}).iteritems(): signature_placeholder = filter(lambda p: p[2] == counter, signature_placeholders) self.assertEqual(len(signature_placeholder), 1) signature_hash = signature_placeholder[0][0] perfdata = tda.signatures[signature_hash][0] for measure in ['max', 'mean']: self.assertEqual(round(float(results[measure]), 2), perfdata[measure]) # filter out this signature from data to process signature_placeholders = filter( lambda p: p[0] != signature_hash, signature_placeholders) # we should be left with just summary signature placeholders self.assertEqual(len(signature_placeholders), 2) perfdata = tda.signatures[signature_placeholders[0][0]][0] if talos_datum.get('summary'): self.assertEqual(round(talos_datum['summary']['suite'], 2), perfdata['geomean']) else: # old style talos blob without summary. again, going away, # but let's at least test that we have the 'geomean' value # generated self.assertTrue(perfdata['geomean'])
def _signature_needs_rewriting(signature_properties, signature_hash): return (not set(signature_properties.keys()).issubset( Command.SIGNIFICANT_KEYS) or signature_hash != TalosDataAdapter.get_series_signature( signature_properties))
def _get_suitekey(signature_props): suite_signature_props = copy.copy(signature_props) for k in ['test', 'subtest_signatures']: if suite_signature_props.get(k): del suite_signature_props[k] return TalosDataAdapter.get_series_signature(suite_signature_props)
def _signature_needs_rewriting(signature_properties, signature_hash): return (not set(signature_properties.keys()).issubset( Command.SIGNIFICANT_KEYS) or signature_hash != TalosDataAdapter.get_series_signature(signature_properties))
def test_adapt_and_load(): talos_perf_data = SampleData.get_talos_perf_data() tda = TalosDataAdapter() result_count = 0 for datum in talos_perf_data: datum = { "job_guid": 'oqiwy0q847365qiu', "name": "test", "type": "test", "blob": datum } job_data = { "oqiwy0q847365qiu": { "id": 1, "result_set_id": 1, "push_timestamp": 1402692388 } } reference_data = { "property1": "value1", "property2": "value2", "property3": "value3" } # one extra result for the summary series result_count += len(datum['blob']["results"]) + 1 # we create one performance series per counter if 'talos_counters' in datum['blob']: result_count += len(datum['blob']["talos_counters"]) # Mimic production environment, the blobs are serialized # when the web service receives them datum['blob'] = json.dumps({'talos_data': [datum['blob']]}) tda.adapt_and_load(reference_data, job_data, datum) # we upload a summary with a suite and subtest values, +1 for suite if 'summary' in datum['blob']: results = json.loads(zlib.decompress(tda.performance_artifact_placeholders[-1][4])) data = json.loads(datum['blob'])['talos_data'][0] assert results["blob"]["performance_series"]["geomean"] == data['summary']['suite'] # deal with the subtests now for i in range(0, len(data['summary']['subtests'])): subresults = json.loads(zlib.decompress(tda.performance_artifact_placeholders[-1 - i][4])) if 'subtest_signatures' in subresults["blob"]['signature_properties']: # ignore summary signatures continue subdata = data['summary']['subtests'][subresults["blob"]['signature_properties']['test']] for datatype in ['min', 'max', 'mean', 'median', 'std']: assert subdata[datatype] == subresults["blob"]["performance_series"][datatype] if 'value' in subdata.keys(): assert subdata['value'] == subresults["blob"]["performance_series"]['value'] else: # FIXME: the talos data blob we're currently using contains datums with summaries and those without # we should probably test non-summarized data as well pass assert result_count == len(tda.performance_artifact_placeholders)
def test_adapt_and_load(self): talos_perf_data = SampleData.get_talos_perf_data() for talos_datum in talos_perf_data: # delete any previously-created perf objects # FIXME: because of https://bugzilla.mozilla.org/show_bug.cgi?id=1133273 # this can be really slow if we have a dev database with lots of # performance data in it (if the test succeeds, the transaction # will be rolled back so at least it won't pollute the production # database) PerformanceSignature.objects.all().delete() PerformanceDatum.objects.all().delete() datum = { "job_guid": 'oqiwy0q847365qiu', "name": "test", "type": "test", "blob": talos_datum } job_data = { "oqiwy0q847365qiu": { "id": 1, "result_set_id": 1, "push_timestamp": 1402692388 } } reference_data = { "option_collection_hash": self.OPTION_HASH, "machine_platform": self.MACHINE_PLATFORM, "property1": "value1", "property2": "value2", "property3": "value3" } # Mimic production environment, the blobs are serialized # when the web service receives them datum['blob'] = json.dumps({'talos_data': [datum['blob']]}) tda = TalosDataAdapter() tda.adapt_and_load(self.REPO_NAME, reference_data, job_data, datum) # base: subtests + one extra result for the summary series expected_result_count = len(talos_datum["results"]) + 1 # we create one performance series per counter if 'talos_counters' in talos_datum: expected_result_count += len(talos_datum["talos_counters"]) # result count == number of signatures self.assertEqual(expected_result_count, PerformanceSignature.objects.all().count()) # verify that we have signatures for the subtests for (testname, results) in talos_datum["results"].iteritems(): signature = PerformanceSignature.objects.get(test=testname) datum = PerformanceDatum.objects.get(signature=signature) if talos_datum.get('summary'): # if we have a summary, ensure the subtest summary values made # it in self.assertEqual( round(talos_datum['summary']['subtests'][testname]['filtered'], 2), datum.value) else: # this is an old style talos blob without a summary. these are going # away, so I'm not going to bother testing the correctness. however # let's at least verify that some values are being generated here self.assertTrue(datum.value) # if we have counters, verify that the series for them is as expected for (counter, results) in talos_datum.get('talos_counters', {}).iteritems(): signature = PerformanceSignature.objects.get(test=counter) datum = PerformanceDatum.objects.get(signature=signature) self.assertEqual(round(float(results['mean']), 2), datum.value) # we should be left with just the summary series signature = PerformanceSignature.objects.get( test='', suite=talos_datum['testrun']['suite']) datum = PerformanceDatum.objects.get(signature=signature) if talos_datum.get('summary'): self.assertEqual(round(talos_datum['summary']['suite'], 2), datum.value) else: # old style talos blob without summary. again, going away, # but let's at least test that we have the value self.assertTrue(datum.value)