def check_record_in_processes(self, name, definition): if not self._strict_type_checks: return field = 'record_in_processes' rip = definition.get(field) DOC_URL = HISTOGRAMS_DOC_URL + "#record-in-processes" if not rip: ParserError('Histogram "%s" must have a "%s" field:\n%s' % (name, field, DOC_URL)).handle_later() for process in rip: if not utils.is_valid_process_name(process): ParserError( 'Histogram "%s" has unknown process "%s" in %s.\n%s' % (name, process, field, DOC_URL)).handle_later()
def check_whitelistable_fields(self, name, definition): # Use counters don't have any mechanism to add the fields checked here, # so skip the check for them. # We also don't need to run any of these checks on the server. if self._is_use_counter or not self._strict_type_checks: return # In the pipeline we don't have whitelists available. if whitelists is None: return for field in ['alert_emails', 'bug_numbers']: if field not in definition and name not in whitelists[field]: raise ParserError('New histogram "%s" must have a "%s" field.' % (name, field)) if field in definition and name in whitelists[field]: msg = 'Histogram "%s" should be removed from the whitelist for "%s" in ' \ 'histogram-whitelists.json.' raise ParserError(msg % (name, field))
def load_whitelist(): global whitelists try: whitelist_path = os.path.join( os.path.abspath(os.path.realpath(os.path.dirname(__file__))), 'histogram-whitelists.json') with open(whitelist_path, 'r') as f: try: whitelists = json.load(f) for name, whitelist in whitelists.iteritems(): whitelists[name] = set(whitelist) except ValueError: ParserError('Error parsing whitelist: %s' % whitelist_path).handle_now() except IOError: whitelists = None ParserError('Unable to parse whitelist: %s.' % whitelist_path).handle_now()
def check(self, identifier, key, value): if len(value.keys()) < 1: ParserError("%s: Failed check for %s - dict should not be empty." % (identifier, key)).handle_now() for x in value.iterkeys(): if not isinstance(x, self.keys_instance_type): ParserError( "%s: Failed dict type check for %s - expected key type %s, got " "%s." % (identifier, key, nice_type_name(self.keys_instance_type), nice_type_name(type(x)))).handle_later() for k, v in value.iteritems(): if not isinstance(v, self.values_instance_type): ParserError("%s: Failed dict type check for %s - " "expected value type %s for key %s, got %s." % (identifier, key, nice_type_name(self.values_instance_type), k, nice_type_name(type(v)))).handle_later()
def from_files(filenames, strict_type_checks=True): """Return an iterator that provides a sequence of Histograms for the histograms defined in filenames. """ all_histograms = OrderedDict() for filename in filenames: parser = FILENAME_PARSERS[os.path.basename(filename)] histograms = parser(filename, strict_type_checks) # OrderedDicts are important, because then the iteration order over # the parsed histograms is stable, which makes the insertion into # all_histograms stable, which makes ordering in generated files # stable, which makes builds more deterministic. if not isinstance(histograms, OrderedDict): raise ParserError("Histogram parser did not provide an OrderedDict.") for (name, definition) in histograms.iteritems(): if name in all_histograms: raise ParserError('Duplicate histogram name "%s".' % name) all_histograms[name] = definition # We require that all USE_COUNTER2_* histograms be defined in a contiguous # block. use_counter_indices = filter(lambda x: x[1].startswith("USE_COUNTER2_"), enumerate(all_histograms.iterkeys())) if use_counter_indices: lower_bound = use_counter_indices[0][0] upper_bound = use_counter_indices[-1][0] n_counters = upper_bound - lower_bound + 1 if n_counters != len(use_counter_indices): raise ParserError("Use counter histograms must be defined in a contiguous block.") # Check that histograms that were removed from Histograms.json etc. # are also removed from the whitelists. if whitelists is not None: all_whitelist_entries = itertools.chain.from_iterable(whitelists.itervalues()) orphaned = set(all_whitelist_entries) - set(all_histograms.keys()) if len(orphaned) > 0: msg = 'The following entries are orphaned and should be removed from ' \ 'histogram-whitelists.json:\n%s' raise ParserError(msg % (', '.join(sorted(orphaned)))) for (name, definition) in all_histograms.iteritems(): yield Histogram(name, definition, strict_type_checks=strict_type_checks)
def check_name(self, name): if '#' in name: ParserError('Error for histogram name "%s": "#" is not allowed.' % (name)).handle_later() # Avoid C++ identifier conflicts between histogram enums and label enum names. if name.startswith("LABELS_"): ParserError( 'Error for histogram name "%s": can not start with "LABELS_".' % (name)).handle_later() # To make it easier to generate C++ identifiers from this etc., we restrict # the histogram names to a strict pattern. # We skip this on the server to avoid failures with old Histogram.json revisions. if self._strict_type_checks: if not re.match(CPP_IDENTIFIER_PATTERN, name, re.IGNORECASE): ParserError( 'Error for histogram name "%s": name does not conform to "%s"' % (name, CPP_IDENTIFIER_PATTERN)).handle_later()
def validate_values(self, definition): """This function checks that the fields have the correct values. :param definition: the dictionary containing the scalar properties. :raises ParserError: if a scalar definition field contains an unexpected value. """ if not self._strict_type_checks: return # Validate the scalar kind. scalar_kind = definition.get('kind') if scalar_kind not in SCALAR_TYPES_MAP.keys(): ParserError(self._name + ' - unknown scalar kind: ' + scalar_kind + '.\nSee: {}'.format(BASE_DOC_URL)).handle_later() # Validate the collection policy. collection_policy = definition.get('release_channel_collection', None) if collection_policy and collection_policy not in ['opt-in', 'opt-out']: ParserError(self._name + ' - unknown collection policy: ' + collection_policy + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL)).handle_later() # Validate the cpp_guard. cpp_guard = definition.get('cpp_guard') if cpp_guard and re.match(r'\W', cpp_guard): ParserError(self._name + ' - invalid cpp_guard: ' + cpp_guard + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL)).handle_later() # Validate record_in_processes. record_in_processes = definition.get('record_in_processes', []) for proc in record_in_processes: if not utils.is_valid_process_name(proc): ParserError(self._name + ' - unknown value in record_in_processes: ' + proc + '.\nSee: {}'.format(BASE_DOC_URL)).handle_later() # Validate the expiration version. # Historical versions of Scalars.json may contain expiration versions # using the deprecated format 'N.Na1'. Those scripts set # self._strict_type_checks to false. expires = definition.get('expires') if not utils.validate_expiration_version(expires) and self._strict_type_checks: ParserError('{} - invalid expires: {}.\nSee: {}#required-fields' .format(self._name, expires, BASE_DOC_URL)).handle_later()
def load_scalars(filename, strict_type_checks=True): """Parses a YAML file containing the scalar definition. :param filename: the YAML file containing the scalars definition. :raises ParserError: if the scalar file cannot be opened or parsed. """ # Parse the scalar definitions from the YAML file. scalars = None try: with open(filename, 'r') as f: scalars = yaml.safe_load(f) except IOError as e: ParserError('Error opening ' + filename + ': ' + e.message).handle_now() except ValueError as e: ParserError('Error parsing scalars in {}: {}' '.\nSee: {}'.format(filename, e.message, BASE_DOC_URL)).handle_now() scalar_list = [] # Scalars are defined in a fixed two-level hierarchy within the definition file. # The first level contains the category name, while the second level contains the # probe name (e.g. "category.name: probe: ..."). for category_name in scalars: category = scalars[category_name] # Make sure that the category has at least one probe in it. if not category or len(category) == 0: ParserError('Category "{}" must have at least one probe in it' '.\nSee: {}'.format(category_name, BASE_DOC_URL)).handle_later() for probe_name in category: # We found a scalar type. Go ahead and parse it. scalar_info = category[probe_name] scalar_list.append( ScalarType(category_name, probe_name, scalar_info, strict_type_checks)) return scalar_list
def from_Histograms_json(filename, strict_type_checks): with open(filename, 'r') as f: try: def hook(ps): return load_histograms_into_dict(ps, strict_type_checks) histograms = json.load(f, object_pairs_hook=hook) except ValueError, e: raise ParserError("error parsing histograms in %s: %s" % (filename, e.message))
def string_check(identifier, field, value, min_length=1, max_length=None, regex=None): # Length check. if len(value) < min_length: ParserError( "%s: Value '%s' for field %s is less than minimum length of %d." % (identifier, value, field, min_length)).handle_later() if max_length and len(value) > max_length: ParserError( "%s: Value '%s' for field %s is greater than maximum length of %d." % (identifier, value, field, max_length)).handle_later() # Regex check. if regex and not re.match(regex, value): ParserError( '%s: String value "%s" for %s is not matching pattern "%s".' % (identifier, value, field, regex)).handle_later()
def check_label_values(self, name, definition): labels = definition.get('labels') if not labels: return invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels) if len(invalid) > 0: ParserError('Label values for "%s" exceed length limit of %d: %s' % (name, MAX_LABEL_LENGTH, ', '.join(invalid))).handle_later() if len(labels) > MAX_LABEL_COUNT: ParserError('Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT)).handle_now() # To make it easier to generate C++ identifiers from this etc., we restrict # the label values to a strict pattern. invalid = filter(lambda l: not re.match(CPP_IDENTIFIER_PATTERN, l, re.IGNORECASE), labels) if len(invalid) > 0: ParserError('Label values for %s are not matching pattern "%s": %s' % (name, CPP_IDENTIFIER_PATTERN, ', '.join(invalid))).handle_later()
def load_allowlist(): global allowlists try: parsers_path = os.path.realpath(os.path.dirname(__file__)) # The parsers live in build_scripts/parsers in the Telemetry module, while # the histogram-allowlists file lives in the root of the module. Account # for that when looking for the allowlist. # NOTE: if the parsers are moved, this logic will need to be updated. telemetry_module_path = os.path.abspath(os.path.join(parsers_path, os.pardir, os.pardir)) allowlist_path = os.path.join(telemetry_module_path, 'histogram-allowlists.json') with open(allowlist_path, 'r') as f: try: allowlists = json.load(f) for name, allowlist in allowlists.iteritems(): allowlists[name] = set(allowlist) except ValueError: ParserError('Error parsing allowlist: %s' % allowlist_path).handle_now() except IOError: allowlists = None ParserError('Unable to parse allowlist: %s.' % allowlist_path).handle_now()
def check_label_values(self, name, definition): labels = definition.get('labels') if not labels: return invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels) if len(invalid) > 0: raise ParserError('Label values for "%s" exceed length limit of %d: %s' % (name, MAX_LABEL_LENGTH, ', '.join(invalid))) if len(labels) > MAX_LABEL_COUNT: raise ParserError('Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT)) # To make it easier to generate C++ identifiers from this etc., we restrict # the label values to a strict pattern. pattern = '^[a-z][a-z0-9_]+[a-z0-9]$' invalid = filter(lambda l: not re.match(pattern, l, re.IGNORECASE), labels) if len(invalid) > 0: raise ParserError('Label values for %s are not matching pattern "%s": %s' % (name, pattern, ', '.join(invalid)))
def set_dataset(self, definition): datasets = { 'opt-in': 'DATASET_RELEASE_CHANNEL_OPTIN', 'opt-out': 'DATASET_RELEASE_CHANNEL_OPTOUT' } value = definition.get('releaseChannelCollection', 'opt-in') if value not in datasets: raise ParserError('Unknown value for releaseChannelCollection' ' policy for histogram "%s".' % self._name) self._dataset = "nsITelemetry::" + datasets[value]
def set_dataset(self, definition): datasets = { 'opt-in': 'DATASET_PRERELEASE_CHANNELS', 'opt-out': 'DATASET_ALL_CHANNELS' } value = definition.get('releaseChannelCollection', 'opt-in') if value not in datasets: ParserError('Unknown value for releaseChannelCollection' ' policy for histogram "%s".' % self._name).handle_later() self._dataset = "nsITelemetry::" + datasets[value]
def validate_values(self, definition): """This function checks that the fields have the correct values. :param definition: the dictionary containing the scalar properties. :raises ParserError: if a scalar definition field contains an unexpected value. """ if not self._strict_type_checks: return # Validate the scalar kind. scalar_kind = definition.get('kind') if scalar_kind not in SCALAR_TYPES_MAP.keys(): raise ParserError(self._name + ' - unknown scalar kind: ' + scalar_kind + '.\nSee: {}'.format(BASE_DOC_URL)) # Validate the collection policy. collection_policy = definition.get('release_channel_collection', None) if collection_policy and collection_policy not in [ 'opt-in', 'opt-out' ]: raise ParserError( self._name + ' - unknown collection policy: ' + collection_policy + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL)) # Validate the cpp_guard. cpp_guard = definition.get('cpp_guard') if cpp_guard and re.match(r'\W', cpp_guard): raise ParserError( self._name + ' - invalid cpp_guard: ' + cpp_guard + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL)) # Validate record_in_processes. record_in_processes = definition.get('record_in_processes', []) for proc in record_in_processes: if not utils.is_valid_process_name(proc): raise ParserError(self._name + ' - unknown value in record_in_processes: ' + proc + '.\nSee: {}'.format(BASE_DOC_URL))
def check_expiration(self, name, definition): field = 'expires_in_version' expiration = definition.get(field) if not expiration: return # We forbid new probes from using "expires_in_version" : "default" field/value pair. # Old ones that use this are added to the whitelist. if expiration == "default" and \ whitelists is not None and \ name not in whitelists['expiry_default']: raise ParserError('New histogram "%s" cannot have "default" %s value.' % (name, field)) if expiration != "default" and not utils.validate_expiration_version(expiration): raise ParserError(('Error for histogram {} - invalid {}: {}.' '\nSee: {}#expires-in-version') .format(name, field, expiration, HISTOGRAMS_DOC_URL)) expiration = utils.add_expiration_postfix(expiration) definition[field] = expiration
def set_bucket_parameters(self, low, high, n_buckets): self._low = low self._high = high self._n_buckets = n_buckets if whitelists is not None and self._n_buckets > 100 and type( self._n_buckets) is int: if self._name not in whitelists['n_buckets']: raise ParserError( 'New histogram "%s" is not permitted to have more than 100 buckets.\n' 'Histograms with large numbers of buckets use disproportionately high' ' amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)' ' if you think an exception ought to be made:\n' 'https://wiki.mozilla.org/Modules/Toolkit#Telemetry' % self._name)
def load_events(filename): """Parses a YAML file containing the event definitions. :param filename: the YAML file containing the event definitions. :raises ParserError: if the event file cannot be opened or parsed. """ # Parse the event definitions from the YAML file. events = None try: with open(filename, 'r') as f: events = yaml.safe_load(f) except IOError, e: raise ParserError('Error opening ' + filename + ': ' + e.message + ".")
def load_scalars(filename, strict_type_checks=True): """Parses a YAML file containing the scalar definition. :param filename: the YAML file containing the scalars definition. :raises ParserError: if the scalar file cannot be opened or parsed. """ # Parse the scalar definitions from the YAML file. scalars = None try: with open(filename, 'r') as f: scalars = yaml.safe_load(f) except IOError, e: raise ParserError('Error opening ' + filename + ': ' + e.message)
def load_events(filename, strict_type_checks): """Parses a YAML file containing the event definitions. :param filename: the YAML file containing the event definitions. :strict_type_checks A boolean indicating whether to use the stricter type checks. :raises ParserError: if the event file cannot be opened or parsed. """ # Parse the event definitions from the YAML file. events = None try: with open(filename, 'r') as f: events = yaml.safe_load(f) except IOError, e: raise ParserError('Error opening ' + filename + ': ' + e.message + ".")
def set_nsITelemetry_kind(self): # Pick a Telemetry implementation type. types = { 'boolean': 'BOOLEAN', 'flag': 'FLAG', 'count': 'COUNT', 'enumerated': 'LINEAR', 'categorical': 'CATEGORICAL', 'linear': 'LINEAR', 'exponential': 'EXPONENTIAL', } if self._kind not in types: raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)) self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind]
def set_bucket_parameters(self, low, high, n_buckets): self._low = low self._high = high self._n_buckets = n_buckets max_n_buckets = 101 if self._kind in ['enumerated', 'categorical' ] else 100 if (allowlists is not None and self._n_buckets > max_n_buckets and type(self._n_buckets) is int): if self._name not in allowlists['n_buckets']: ParserError( 'New histogram "%s" is not permitted to have more than 100 buckets.\n' 'Histograms with large numbers of buckets use disproportionately high' ' amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)' ' if you think an exception ought to be made:\n' 'https://wiki.mozilla.org/Modules/Toolkit#Telemetry' % self._name).handle_later()
def compute_bucket_parameters(self, definition): bucket_fns = { 'boolean': Histogram.boolean_flag_bucket_parameters, 'flag': Histogram.boolean_flag_bucket_parameters, 'count': Histogram.boolean_flag_bucket_parameters, 'enumerated': Histogram.enumerated_bucket_parameters, 'categorical': Histogram.categorical_bucket_parameters, 'linear': Histogram.linear_bucket_parameters, 'exponential': Histogram.exponential_bucket_parameters, } if self._kind not in bucket_fns: raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)) fn = bucket_fns[self._kind] self.set_bucket_parameters(*fn(definition))
def check_record_into_store(self, name, definition): if not self._strict_type_checks: return field = 'record_into_store' DOC_URL = HISTOGRAMS_DOC_URL + "#record-into-store" if field not in definition: # record_into_store is optional return record_into_store = definition.get(field) # record_into_store should not be empty if not record_into_store: ParserError('Histogram "%s" has empty list of stores, which is not allowed.\n%s' % (name, DOC_URL)).handle_later()
def ranges(self): """Return an array of lower bounds for each bucket in the histogram.""" bucket_fns = { 'boolean': linear_buckets, 'flag': linear_buckets, 'count': linear_buckets, 'enumerated': linear_buckets, 'categorical': linear_buckets, 'linear': linear_buckets, 'exponential': exponential_buckets, } if self._kind not in bucket_fns: raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)) fn = bucket_fns[self._kind] return fn(self.low(), self.high(), self.n_buckets())
def check_operating_systems(self, name, definition): if not self._strict_type_checks: return field = 'operating_systems' operating_systems = definition.get(field) DOC_URL = HISTOGRAMS_DOC_URL + "#operating_systems" if not operating_systems: # operating_systems is optional return for operating_system in operating_systems: if not utils.is_valid_os(operating_system): ParserError('Histogram "%s" has unknown operating system "%s" in %s.\n%s' % (name, operating_system, field, DOC_URL)).handle_later()
def check_products(self, name, definition): if not self._strict_type_checks: return field = 'products' products = definition.get(field) DOC_URL = HISTOGRAMS_DOC_URL + "#products" if not products: # products is optional return for product in products: if not utils.is_valid_product(product): ParserError('Histogram "%s" has unknown product "%s" in %s.\n%s' % (name, product, field, DOC_URL)).handle_later()
def check_expiration(self, name, definition): field = 'expires_in_version' expiration = definition.get(field) if not expiration: return # We forbid new probes from using "expires_in_version" : "default" field/value pair. # Old ones that use this are added to the whitelist. if expiration == "default" and \ whitelists is not None and \ name not in whitelists['expiry_default']: raise ParserError('New histogram "%s" cannot have "default" %s value.' % (name, field)) if re.match(r'^[1-9][0-9]*$', expiration): expiration = expiration + ".0a1" elif re.match(r'^[1-9][0-9]*\.0$', expiration): expiration = expiration + "a1" definition[field] = expiration
def check_whitelisted_kind(self, name, definition): # We don't need to run any of these checks on the server. if not self._strict_type_checks or whitelists is None: return # Disallow "flag" and "count" histograms on desktop, suggest to use # scalars instead. Allow using these histograms on Android, as we # don't support scalars there yet. hist_kind = definition.get("kind") android_target = "android" in definition.get("operating_systems", []) if not android_target and \ hist_kind in ["flag", "count"] and \ name not in whitelists["kind"]: ParserError(('Unsupported kind "%s" for histogram "%s":\n' 'New "%s" histograms are not supported on Desktop, you should' ' use scalars instead:\n' '%s\n' 'Are you trying to add a histogram on Android?' ' Add "operating_systems": ["android"] to your histogram definition.') % (hist_kind, name, hist_kind, SCALARS_DOC_URL)).handle_now()