def ProcessHistogramSet(histogram_dicts): if not isinstance(histogram_dicts, list): raise api_request_handler.BadRequestError( 'HistogramSet JSON much be a list of dicts') bot_whitelist_future = stored_object.GetAsync( add_point_queue.BOT_WHITELIST_KEY) histograms = histogram_set.HistogramSet() histograms.ImportDicts(histogram_dicts) histograms.ResolveRelatedHistograms() histograms.DeduplicateDiagnostics() if len(histograms) == 0: raise api_request_handler.BadRequestError( 'HistogramSet JSON must contain at least one histogram.') _LogDebugInfo(histograms) InlineDenseSharedDiagnostics(histograms) # TODO(eakuefner): Get rid of this. # https://github.com/catapult-project/catapult/issues/4242 _PurgeHistogramBinData(histograms) revision = ComputeRevision(histograms) master = _GetDiagnosticValue(reserved_infos.MASTERS.name, histograms.GetFirstHistogram()) bot = _GetDiagnosticValue(reserved_infos.BOTS.name, histograms.GetFirstHistogram()) benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name, histograms.GetFirstHistogram()) benchmark_description = _GetDiagnosticValue( reserved_infos.BENCHMARK_DESCRIPTIONS.name, histograms.GetFirstHistogram(), optional=True) suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark)) bot_whitelist = bot_whitelist_future.get_result() internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) # We'll skip the histogram-level sparse diagnostics because we need to # handle those with the histograms, below, so that we can properly assign # test paths. suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics( histograms, suite_key, revision, internal_only) # TODO(eakuefner): Refactor master/bot computation to happen above this line # so that we can replace with a DiagnosticRef rather than a full diagnostic. new_guids_to_old_diagnostics = DeduplicateAndPut( suite_level_sparse_diagnostic_entities, suite_key, revision) for new_guid, old_diagnostic in new_guids_to_old_diagnostics.iteritems(): histograms.ReplaceSharedDiagnostic( new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic)) tasks = _BatchHistogramsIntoTasks(suite_key.id(), histograms, revision, benchmark_description) _QueueHistogramTasks(tasks)
def _ProcessRowAndHistogram(params, bot_whitelist): revision = int(params['revision']) test_path = params['test_path'] benchmark_description = params['benchmark_description'] data_dict = params['data'] logging.info('Processing: %s', test_path) hist = histogram_module.Histogram.FromDict(data_dict) if hist.num_values == 0: return [] test_path_parts = test_path.split('/') master = test_path_parts[0] bot = test_path_parts[1] benchmark_name = test_path_parts[2] histogram_name = test_path_parts[3] if len(test_path_parts) > 4: rest = '/'.join(test_path_parts[4:]) else: rest = None full_test_name = '/'.join(test_path_parts[2:]) internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) extra_args = GetUnitArgs(hist.unit) unescaped_story_name = _GetStoryFromDiagnosticsDict(params.get('diagnostics')) # TDOO(eakuefner): Populate benchmark_description once it appears in # diagnostics. # https://github.com/catapult-project/catapult/issues/4096 parent_test = add_point_queue.GetOrCreateAncestors( master, bot, full_test_name, internal_only=internal_only, unescaped_story_name=unescaped_story_name, benchmark_description=benchmark_description, **extra_args) test_key = parent_test.key statistics_scalars = hist.statistics_scalars legacy_parent_tests = {} # TODO(#4213): Stop doing this. if benchmark_name in LEGACY_BENCHMARKS: statistics_scalars = {} for stat_name, scalar in statistics_scalars.iteritems(): if _ShouldFilter(histogram_name, benchmark_name, stat_name): continue extra_args = GetUnitArgs(scalar.unit) suffixed_name = '%s/%s_%s' % ( benchmark_name, histogram_name, stat_name) if rest is not None: suffixed_name += '/' + rest legacy_parent_tests[stat_name] = add_point_queue.GetOrCreateAncestors( master, bot, suffixed_name, internal_only=internal_only, unescaped_story_name=unescaped_story_name, **extra_args) return [ _AddRowsFromData(params, revision, parent_test, legacy_parent_tests, internal_only), _AddHistogramFromData(params, revision, test_key, internal_only)]
def _ProcessRowAndHistogram(params, bot_whitelist): revision = int(params['revision']) test_path = params['test_path'] data_dict = params['data'] logging.info('Processing: %s', test_path) test_path_parts = test_path.split('/') master = test_path_parts[0] bot = test_path_parts[1] test_name = '/'.join(test_path_parts[2:]) internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) extra_args = GetUnitArgs(data_dict['unit']) unescaped_story_name = _GetStoryFromDiagnosticsDict( params.get('diagnostics')) # TDOO(eakuefner): Populate benchmark_description once it appears in # diagnostics. # https://github.com/catapult-project/catapult/issues/4096 parent_test = add_point_queue.GetOrCreateAncestors( master, bot, test_name, internal_only, unescaped_story_name=unescaped_story_name, **extra_args) test_key = parent_test.key return [ _AddRowFromData(params, revision, parent_test, internal_only), _AddHistogramFromData(params, revision, test_key, internal_only) ]
def GetSuiteKey(histograms): assert len(histograms) > 0 # TODO(eakuefner): Refactor this to coalesce the boilerplate (note that this # is all also being done in add_histograms_queue's post handler) master, bot, benchmark = _GetMasterBotBenchmarkFromHistogram( histograms.GetFirstHistogram()) bot_whitelist = stored_object.Get(add_point_queue.BOT_WHITELIST_KEY) internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) return add_point_queue.GetOrCreateAncestors(master, bot, benchmark, internal_only).key
def post(self): """Adds a single histogram or sparse shared diagnostic to the datastore. The |data| request parameter can be either a histogram or a sparse shared diagnostic; the set of diagnostics that are considered sparse (meaning that they don't normally change on every upload for a given benchmark from a given bot) is shown in add_histograms.SPARSE_DIAGNOSTIC_TYPES. See https://goo.gl/lHzea6 for detailed information on the JSON format for histograms and diagnostics. Request parameters: data: JSON encoding of a histogram or shared diagnostic. revision: a revision, given as an int. test_path: the test path to which this diagnostic or histogram should be attached. """ datastore_hooks.SetPrivilegedRequest() data = self.request.get('data') revision = int(self.request.get('revision')) test_path = self.request.get('test_path') data_dict = json.loads(data) guid = data_dict['guid'] is_diagnostic = 'type' in data_dict test_path_parts = test_path.split('/') master = test_path_parts[0] bot = test_path_parts[1] test_name = '/'.join(test_path_parts[2:]) bot_whitelist = stored_object.Get(add_point_queue.BOT_WHITELIST_KEY) internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) extra_args = {} if is_diagnostic else GetUnitArgs(data_dict['unit']) # TDOO(eakuefner): Populate benchmark_description once it appears in # diagnostics. test_key = add_point_queue.GetOrCreateAncestors( master, bot, test_name, internal_only, **extra_args).key if is_diagnostic: entity = histogram.SparseDiagnostic(id=guid, data=data, test=test_key, start_revision=revision, end_revision=revision, internal_only=internal_only) else: entity = histogram.Histogram(id=guid, data=data, test=test_key, revision=revision, internal_only=internal_only) AddRow(data_dict, test_key, revision, test_path, internal_only) entity.put()
def _ProcessRowAndHistogram(params, bot_whitelist): revision = int(params['revision']) test_path = params['test_path'] data_dict = params['data'] logging.info('Processing: %s', test_path) hist = histogram_module.Histogram.FromDict(data_dict) test_path_parts = test_path.split('/') master = test_path_parts[0] bot = test_path_parts[1] test_name = '/'.join(test_path_parts[2:]) internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) extra_args = GetUnitArgs(hist.unit) unescaped_story_name = _GetStoryFromDiagnosticsDict( params.get('diagnostics')) # TDOO(eakuefner): Populate benchmark_description once it appears in # diagnostics. # https://github.com/catapult-project/catapult/issues/4096 parent_test = add_point_queue.GetOrCreateAncestors( master, bot, test_name, internal_only=internal_only, unescaped_story_name=unescaped_story_name, **extra_args) test_key = parent_test.key benchmark_name = test_path_parts[2] statistics_scalars = hist.statistics_scalars legacy_parent_tests = {} if test_name.endswith('_ref'): test_name = test_name[:-4] ref_suffix = '_ref' elif test_name.endswith('/ref'): test_name = test_name[:-4] ref_suffix = '/ref' else: ref_suffix = '' # TODO(#4213): Stop doing this. if _ShouldFilter(test_name, benchmark_name): statistics_scalars = {'avg': statistics_scalars['avg']} elif benchmark_name in LEGACY_BENCHMARKS: statistics_scalars = {} for stat_name, scalar in statistics_scalars.iteritems(): extra_args = GetUnitArgs(scalar.unit) suffixed_name = '%s_%s%s' % (test_name, stat_name, ref_suffix) legacy_parent_tests[stat_name] = add_point_queue.GetOrCreateAncestors( master, bot, suffixed_name, internal_only=internal_only, unescaped_story_name=unescaped_story_name, **extra_args) return [ _AddRowsFromData(params, revision, parent_test, legacy_parent_tests, internal_only), _AddHistogramFromData(params, revision, test_key, internal_only) ]
def ProcessHistogramSet(histogram_dicts): if not isinstance(histogram_dicts, list): raise api_request_handler.BadRequestError( 'HistogramSet JSON much be a list of dicts') bot_whitelist_future = stored_object.GetAsync( add_point_queue.BOT_WHITELIST_KEY) histograms = histogram_set.HistogramSet() with timing.WallTimeLogger('hs.ImportDicts'): histograms.ImportDicts(histogram_dicts) with timing.WallTimeLogger('hs.ResolveRelatedHistograms'): histograms.ResolveRelatedHistograms() with timing.WallTimeLogger('hs.DeduplicateDiagnostics'): histograms.DeduplicateDiagnostics() if len(histograms) == 0: raise api_request_handler.BadRequestError( 'HistogramSet JSON must contain at least one histogram.') with timing.WallTimeLogger('hs._LogDebugInfo'): _LogDebugInfo(histograms) with timing.WallTimeLogger('InlineDenseSharedDiagnostics'): InlineDenseSharedDiagnostics(histograms) # TODO(eakuefner): Get rid of this. # https://github.com/catapult-project/catapult/issues/4242 with timing.WallTimeLogger('_PurgeHistogramBinData'): _PurgeHistogramBinData(histograms) with timing.WallTimeLogger('_GetDiagnosticValue calls'): master = _GetDiagnosticValue(reserved_infos.MASTERS.name, histograms.GetFirstHistogram()) bot = _GetDiagnosticValue(reserved_infos.BOTS.name, histograms.GetFirstHistogram()) benchmark = _GetDiagnosticValue(reserved_infos.BENCHMARKS.name, histograms.GetFirstHistogram()) benchmark_description = _GetDiagnosticValue( reserved_infos.BENCHMARK_DESCRIPTIONS.name, histograms.GetFirstHistogram(), optional=True) with timing.WallTimeLogger('_ValidateMasterBotBenchmarkName'): _ValidateMasterBotBenchmarkName(master, bot, benchmark) with timing.WallTimeLogger('ComputeRevision'): suite_key = utils.TestKey('%s/%s/%s' % (master, bot, benchmark)) logging.info('Suite: %s', suite_key.id()) revision = ComputeRevision(histograms) bot_whitelist = bot_whitelist_future.get_result() internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) revision_record = histogram.HistogramRevisionRecord.GetOrCreate( suite_key, revision) revision_record.put() last_added = histogram.HistogramRevisionRecord.GetLatest( suite_key).get_result() # On first upload, a query immediately following a put may return nothing. if not last_added: last_added = revision_record _CheckRequest(last_added, 'No last revision') # We'll skip the histogram-level sparse diagnostics because we need to # handle those with the histograms, below, so that we can properly assign # test paths. with timing.WallTimeLogger('FindSuiteLevelSparseDiagnostics'): suite_level_sparse_diagnostic_entities = FindSuiteLevelSparseDiagnostics( histograms, suite_key, revision, internal_only) # TODO(eakuefner): Refactor master/bot computation to happen above this line # so that we can replace with a DiagnosticRef rather than a full diagnostic. with timing.WallTimeLogger('DeduplicateAndPut'): new_guids_to_old_diagnostics = ( histogram.SparseDiagnostic.FindOrInsertDiagnostics( suite_level_sparse_diagnostic_entities, suite_key, revision, last_added.revision).get_result()) with timing.WallTimeLogger('ReplaceSharedDiagnostic calls'): for new_guid, old_diagnostic in new_guids_to_old_diagnostics.iteritems( ): histograms.ReplaceSharedDiagnostic( new_guid, diagnostic.Diagnostic.FromDict(old_diagnostic)) with timing.WallTimeLogger('_BatchHistogramsIntoTasks'): tasks = _BatchHistogramsIntoTasks(suite_key.id(), histograms, revision, benchmark_description) with timing.WallTimeLogger('_QueueHistogramTasks'): _QueueHistogramTasks(tasks)
def post(self): """Adds a single histogram or sparse shared diagnostic to the datastore. The |data| request parameter can be either a histogram or a sparse shared diagnostic; the set of diagnostics that are considered sparse (meaning that they don't normally change on every upload for a given benchmark from a given bot) is shown in add_histograms.SPARSE_DIAGNOSTIC_TYPES. See https://goo.gl/lHzea6 for detailed information on the JSON format for histograms and diagnostics. Request parameters: data: JSON encoding of a histogram or shared diagnostic. revision: a revision, given as an int. test_path: the test path to which this diagnostic or histogram should be attached. """ datastore_hooks.SetPrivilegedRequest() data = self.request.get('data') revision = int(self.request.get('revision')) test_path = self.request.get('test_path') data_dict = json.loads(data) guid = data_dict['guid'] is_diagnostic = 'type' in data_dict test_path_parts = test_path.split('/') master = test_path_parts[0] bot = test_path_parts[1] test_name = '/'.join(test_path_parts[2:]) bot_whitelist = stored_object.Get(add_point_queue.BOT_WHITELIST_KEY) internal_only = add_point_queue.BotInternalOnly(bot, bot_whitelist) extra_args = {} if is_diagnostic else GetUnitArgs(data_dict['unit']) # TDOO(eakuefner): Populate benchmark_description once it appears in # diagnostics. parent_test = add_point_queue.GetOrCreateAncestors( master, bot, test_name, internal_only, **extra_args) test_key = parent_test.key added_rows = [] monitored_test_keys = [] if is_diagnostic: entity = histogram.SparseDiagnostic(id=guid, data=data, test=test_key, start_revision=revision, end_revision=revision, internal_only=internal_only) else: diagnostics = self.request.get('diagnostics') if diagnostics: diagnostic_data = json.loads(diagnostics) diagnostic_entities = [] for diagnostic_datum in diagnostic_data: # TODO(eakuefner): Pass map of guid to dict to avoid overhead guid = diagnostic_datum['guid'] diagnostic_entities.append( histogram.SparseDiagnostic( id=guid, data=diagnostic_datum, test=test_key, start_revision=revision, end_revision=sys.maxint, internal_only=internal_only)) new_guids_to_existing_diagnostics = add_histograms.DeduplicateAndPut( diagnostic_entities, test_key, revision).iteritems() # TODO(eakuefner): Move per-histogram monkeypatching logic to Histogram. hs = histogram_set.HistogramSet() hs.ImportDicts([data_dict]) # TODO(eakuefner): Share code for replacement logic with add_histograms for new_guid, existing_diagnostic in new_guids_to_existing_diagnostics: hs.ReplaceSharedDiagnostic( new_guid, diagnostic_ref.DiagnosticRef( existing_diagnostic['guid'])) data = hs.GetFirstHistogram().AsDict() entity = histogram.Histogram(id=guid, data=data, test=test_key, revision=revision, internal_only=internal_only) row = AddRow(data_dict, test_key, revision, test_path, internal_only) added_rows.append(row) is_monitored = parent_test.sheriff and parent_test.has_rows if is_monitored: monitored_test_keys.append(parent_test.key) entity.put() tests_keys = [ k for k in monitored_test_keys if not add_point_queue.IsRefBuild(k) ] # Updating of the cached graph revisions should happen after put because # it requires the new row to have a timestamp, which happens upon put. futures = [ graph_revisions.AddRowsToCacheAsync(added_rows), find_anomalies.ProcessTestsAsync(tests_keys) ] ndb.Future.wait_all(futures)