def string_check(identifier, field, value, min_length=1, max_length=None, regex=None): # Length check. if len(value) < min_length: ParserError( "%s: Value '%s' for field %s is less than minimum length of %d." % (identifier, value, field, min_length)).handle_later() if max_length and len(value) > max_length: ParserError( "%s: Value '%s' for field %s is greater than maximum length of %d." % (identifier, value, field, max_length)).handle_later() # Regex check. if regex and not re.match(regex, value): ParserError( '%s: String value "%s" for %s is not matching pattern "%s".' % (identifier, value, field, regex)).handle_later()
def check_name(self, name): if '#' in name: raise ParserError( 'Error for histogram name "%s": "#" is not allowed.' % (name)) # Avoid C++ identifier conflicts between histogram enums and label enum names. if name.startswith("LABELS_"): raise ParserError( 'Error for histogram name "%s": can not start with "LABELS_".' % (name)) # To make it easier to generate C++ identifiers from this etc., we restrict # the histogram names to a strict pattern. # We skip this on the server to avoid failures with old Histogram.json revisions. if self._strict_type_checks: pattern = '^[a-z][a-z0-9_]+[a-z0-9]$' if not re.match(pattern, name, re.IGNORECASE): raise ParserError( 'Error for histogram name "%s": name does not conform to "%s"' % (name, pattern))
def check_whitelistable_fields(self, name, definition): # Use counters don't have any mechanism to add the fields checked here, # so skip the check for them. # We also don't need to run any of these checks on the server. if self._is_use_counter or not self._strict_type_checks: return # In the pipeline we don't have whitelists available. if whitelists is None: return for field in ['alert_emails', 'bug_numbers']: if field not in definition and name not in whitelists[field]: raise ParserError( 'New histogram "%s" must have a "%s" field.' % (name, field)) if field in definition and name in whitelists[field]: msg = 'Histogram "%s" should be removed from the whitelist for "%s" in ' \ 'histogram-whitelists.json.' raise ParserError(msg % (name, field))
def from_Histograms_json(filename, strict_type_checks): with open(filename, 'r') as f: try: def hook(ps): return load_histograms_into_dict(ps, strict_type_checks) histograms = json.load(f, object_pairs_hook=hook) except ValueError, e: ParserError("error parsing histograms in %s: %s" % (filename, e.message)).handle_now()
def check(self, identifier, key, value): if len(value.keys()) < 1: raise ParserError( "%s: Failed check for %s - dict should not be empty." % (identifier, key)) for x in value.iterkeys(): if not isinstance(x, self.keys_instance_type): raise ParserError( "%s: Failed dict type check for %s - expected key type %s, got " "%s." % (identifier, key, nice_type_name( self.keys_instance_type), nice_type_name(type(x)))) for k, v in value.iteritems(): if not isinstance(v, self.values_instance_type): raise ParserError( "%s: Failed dict type check for %s - " "expected value type %s for key %s, got %s." % (identifier, key, nice_type_name( self.values_instance_type), k, nice_type_name( type(v))))
def test_missing_alert_emails(self): SAMPLE_HISTOGRAM = { "TEST_HISTOGRAM_WHITELIST_ALERT_EMAILS": { "record_in_processes": ["main", "content"], "bug_numbers": [1383793], "expires_in_version": "never", "kind": "boolean", "description": "Test histogram", } } histograms = load_histogram(SAMPLE_HISTOGRAM) parse_histograms.load_whitelist() parse_histograms.Histogram('TEST_HISTOGRAM_WHITELIST_ALERT_EMAILS', histograms['TEST_HISTOGRAM_WHITELIST_ALERT_EMAILS'], strict_type_checks=True) self.assertRaises(SystemExit, ParserError.exit_func) # Set global whitelists for parse_histograms. parse_histograms.whitelists = { "alert_emails": [ "TEST_HISTOGRAM_WHITELIST_ALERT_EMAILS" ], "bug_numbers": [], "n_buckets": [], "expiry_default": [], "kind": [] } hist = parse_histograms.Histogram('TEST_HISTOGRAM_WHITELIST_ALERT_EMAILS', histograms['TEST_HISTOGRAM_WHITELIST_ALERT_EMAILS'], strict_type_checks=True) ParserError.exit_func() self.assertEqual(hist.expiration(), 'never') self.assertEqual(hist.kind(), 'boolean') self.assertEqual(hist.record_in_processes(), ["main", "content"]) self.assertEqual(hist.keyed(), False) parse_histograms.whitelists = None
def test_unsupported_kind_flag(self): SAMPLE_HISTOGRAM = { "TEST_HISTOGRAM_WHITELIST_KIND": { "record_in_processes": ["main", "content"], "expires_in_version": "never", "kind": "flag", "alert_emails": ["*****@*****.**"], "bug_numbers": [1383793], "description": "Test histogram", } } histograms = load_histogram(SAMPLE_HISTOGRAM) parse_histograms.load_whitelist() self.assertRaises(SystemExit, parse_histograms.Histogram, 'TEST_HISTOGRAM_WHITELIST_KIND', histograms['TEST_HISTOGRAM_WHITELIST_KIND'], strict_type_checks=True) # Set global whitelists for parse_histograms. parse_histograms.whitelists = { "alert_emails": [], "bug_numbers": [], "n_buckets": [], "expiry_default": [], "kind": [ "TEST_HISTOGRAM_WHITELIST_KIND" ] } hist = parse_histograms.Histogram('TEST_HISTOGRAM_WHITELIST_KIND', histograms['TEST_HISTOGRAM_WHITELIST_KIND'], strict_type_checks=True) ParserError.exit_func() self.assertEqual(hist.expiration(), 'never') self.assertEqual(hist.kind(), 'flag') self.assertEqual(hist.record_in_processes(), ["main", "content"]) self.assertEqual(hist.keyed(), False) parse_histograms.whitelists = None
def check_label_values(self, name, definition): labels = definition.get('labels') if not labels: return invalid = filter(lambda l: len(l) > MAX_LABEL_LENGTH, labels) if len(invalid) > 0: raise ParserError('Label values for "%s" exceed length limit of %d: %s' % (name, MAX_LABEL_LENGTH, ', '.join(invalid))) if len(labels) > MAX_LABEL_COUNT: raise ParserError('Label count for "%s" exceeds limit of %d' % (name, MAX_LABEL_COUNT)) # To make it easier to generate C++ identifiers from this etc., we restrict # the label values to a strict pattern. pattern = '^[a-z][a-z0-9_]+[a-z0-9]$' invalid = filter(lambda l: not re.match(pattern, l, re.IGNORECASE), labels) if len(invalid) > 0: raise ParserError('Label values for %s are not matching pattern "%s": %s' % (name, pattern, ', '.join(invalid)))
def set_dataset(self, definition): datasets = { 'opt-in': 'DATASET_RELEASE_CHANNEL_OPTIN', 'opt-out': 'DATASET_RELEASE_CHANNEL_OPTOUT' } value = definition.get('releaseChannelCollection', 'opt-in') if value not in datasets: raise ParserError('Unknown value for releaseChannelCollection' ' policy for histogram "%s".' % self._name) self._dataset = "nsITelemetry::" + datasets[value]
def set_dataset(self, definition): datasets = { 'opt-in': 'DATASET_PRERELEASE_CHANNELS', 'opt-out': 'DATASET_ALL_CHANNELS' } value = definition.get('releaseChannelCollection', 'opt-in') if value not in datasets: ParserError('Unknown value for releaseChannelCollection' ' policy for histogram "%s".' % self._name).handle_later() self._dataset = "nsITelemetry::" + datasets[value]
def validate_values(self, definition): """This function checks that the fields have the correct values. :param definition: the dictionary containing the scalar properties. :raises ParserError: if a scalar definition field contains an unexpected value. """ if not self._strict_type_checks: return # Validate the scalar kind. scalar_kind = definition.get('kind') if scalar_kind not in SCALAR_TYPES_MAP.keys(): raise ParserError(self._name + ' - unknown scalar kind: ' + scalar_kind + '.\nSee: {}'.format(BASE_DOC_URL)) # Validate the collection policy. collection_policy = definition.get('release_channel_collection', None) if collection_policy and collection_policy not in [ 'opt-in', 'opt-out' ]: raise ParserError( self._name + ' - unknown collection policy: ' + collection_policy + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL)) # Validate the cpp_guard. cpp_guard = definition.get('cpp_guard') if cpp_guard and re.match(r'\W', cpp_guard): raise ParserError( self._name + ' - invalid cpp_guard: ' + cpp_guard + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL)) # Validate record_in_processes. record_in_processes = definition.get('record_in_processes', []) for proc in record_in_processes: if not utils.is_valid_process_name(proc): raise ParserError(self._name + ' - unknown value in record_in_processes: ' + proc + '.\nSee: {}'.format(BASE_DOC_URL))
def validate_values(self, definition): """This function checks that the fields have the correct values. :param definition: the dictionary containing the scalar properties. :raises ParserError: if a scalar definition field contains an unexpected value. """ if not self._strict_type_checks: return # Validate the scalar kind. scalar_kind = definition.get('kind') if scalar_kind not in SCALAR_TYPES_MAP.keys(): ParserError(self._name + ' - unknown scalar kind: ' + scalar_kind + '.\nSee: {}'.format(BASE_DOC_URL)).handle_later() # Validate the collection policy. collection_policy = definition.get('release_channel_collection', None) if collection_policy and collection_policy not in [ 'opt-in', 'opt-out' ]: ParserError(self._name + ' - unknown collection policy: ' + collection_policy + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL) ).handle_later() # Validate the cpp_guard. cpp_guard = definition.get('cpp_guard') if cpp_guard and re.match(r'\W', cpp_guard): ParserError(self._name + ' - invalid cpp_guard: ' + cpp_guard + '.\nSee: {}#optional-fields'.format(BASE_DOC_URL) ).handle_later() # Validate record_in_processes. record_in_processes = definition.get('record_in_processes', []) for proc in record_in_processes: if not utils.is_valid_process_name(proc): ParserError(self._name + ' - unknown value in record_in_processes: ' + proc + '.\nSee: {}'.format(BASE_DOC_URL)).handle_later() # Validate product. products = definition.get('products', []) for product in products: if not utils.is_valid_product(product): ParserError(self._name + ' - unknown value in products: ' + product + '.\nSee: {}'.format(BASE_DOC_URL)).handle_later() # Validate the expiration version. # Historical versions of Scalars.json may contain expiration versions # using the deprecated format 'N.Na1'. Those scripts set # self._strict_type_checks to false. expires = definition.get('expires') if not utils.validate_expiration_version( expires) and self._strict_type_checks: ParserError( '{} - invalid expires: {}.\nSee: {}#required-fields'.format( self._name, expires, BASE_DOC_URL)).handle_later()
def check_expiration(self, name, definition): field = 'expires_in_version' expiration = definition.get(field) if not expiration: return # We forbid new probes from using "expires_in_version" : "default" field/value pair. # Old ones that use this are added to the whitelist. if expiration == "default" and \ whitelists is not None and \ name not in whitelists['expiry_default']: raise ParserError('New histogram "%s" cannot have "default" %s value.' % (name, field)) if expiration != "default" and not utils.validate_expiration_version(expiration): raise ParserError(('Error for histogram {} - invalid {}: {}.' '\nSee: {}#expires-in-version') .format(name, field, expiration, HISTOGRAMS_DOC_URL)) expiration = utils.add_expiration_postfix(expiration) definition[field] = expiration
def test_valid_histogram(self): SAMPLE_HISTOGRAM = { "TEST_VALID_HISTOGRAM": { "record_in_processes": ["main", "content"], "alert_emails": ["*****@*****.**"], "bug_numbers": [1383793], "expires_in_version": "never", "kind": "boolean", "description": "Test histogram" } } histograms = load_histogram(SAMPLE_HISTOGRAM) parse_histograms.load_whitelist() hist = parse_histograms.Histogram('TEST_VALID_HISTOGRAM', histograms['TEST_VALID_HISTOGRAM'], strict_type_checks=True) ParserError.exit_func() self.assertTrue(hist.expiration(), "never") self.assertTrue(hist.kind(), "boolean") self.assertTrue(hist.record_in_processes, ["main", "content"])
def load_events(filename): """Parses a YAML file containing the event definitions. :param filename: the YAML file containing the event definitions. :raises ParserError: if the event file cannot be opened or parsed. """ # Parse the event definitions from the YAML file. events = None try: with open(filename, 'r') as f: events = yaml.safe_load(f) except IOError, e: raise ParserError('Error opening ' + filename + ': ' + e.message + ".")
def set_bucket_parameters(self, low, high, n_buckets): self._low = low self._high = high self._n_buckets = n_buckets if whitelists is not None and self._n_buckets > 100 and type( self._n_buckets) is int: if self._name not in whitelists['n_buckets']: raise ParserError( 'New histogram "%s" is not permitted to have more than 100 buckets.\n' 'Histograms with large numbers of buckets use disproportionately high' ' amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)' ' if you think an exception ought to be made:\n' 'https://wiki.mozilla.org/Modules/Toolkit#Telemetry' % self._name)
def load_scalars(filename, strict_type_checks=True): """Parses a YAML file containing the scalar definition. :param filename: the YAML file containing the scalars definition. :raises ParserError: if the scalar file cannot be opened or parsed. """ # Parse the scalar definitions from the YAML file. scalars = None try: with open(filename, 'r') as f: scalars = yaml.safe_load(f) except IOError, e: raise ParserError('Error opening ' + filename + ': ' + e.message)
def load_allowlist(): global allowlists try: parsers_path = os.path.realpath(os.path.dirname(__file__)) # The parsers live in build_scripts/parsers in the Telemetry module, while # the histogram-allowlists file lives in the root of the module. Account # for that when looking for the allowlist. # NOTE: if the parsers are moved, this logic will need to be updated. telemetry_module_path = os.path.abspath( os.path.join(parsers_path, os.pardir, os.pardir)) allowlist_path = os.path.join(telemetry_module_path, 'histogram-allowlists.json') with open(allowlist_path, 'r') as f: try: allowlists = json.load(f) for name, allowlist in allowlists.iteritems(): allowlists[name] = set(allowlist) except ValueError: ParserError('Error parsing allowlist: %s' % allowlist_path).handle_now() except IOError: allowlists = None ParserError('Unable to parse allowlist: %s.' % allowlist_path).handle_now()
def load_events(filename, strict_type_checks): """Parses a YAML file containing the event definitions. :param filename: the YAML file containing the event definitions. :strict_type_checks A boolean indicating whether to use the stricter type checks. :raises ParserError: if the event file cannot be opened or parsed. """ # Parse the event definitions from the YAML file. events = None try: with open(filename, 'r') as f: events = yaml.safe_load(f) except IOError, e: raise ParserError('Error opening ' + filename + ': ' + e.message + ".")
def set_bucket_parameters(self, low, high, n_buckets): self._low = low self._high = high self._n_buckets = n_buckets max_n_buckets = 101 if self._kind in ['enumerated', 'categorical' ] else 100 if (allowlists is not None and self._n_buckets > max_n_buckets and type(self._n_buckets) is int): if self._name not in allowlists['n_buckets']: ParserError( 'New histogram "%s" is not permitted to have more than 100 buckets.\n' 'Histograms with large numbers of buckets use disproportionately high' ' amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)' ' if you think an exception ought to be made:\n' 'https://wiki.mozilla.org/Modules/Toolkit#Telemetry' % self._name).handle_later()
def check_record_into_store(self, name, definition): if not self._strict_type_checks: return field = 'record_into_store' DOC_URL = HISTOGRAMS_DOC_URL + "#record-into-store" if field not in definition: # record_into_store is optional return record_into_store = definition.get(field) # record_into_store should not be empty if not record_into_store: ParserError('Histogram "%s" has empty list of stores, which is not allowed.\n%s' % (name, DOC_URL)).handle_later()
def compute_bucket_parameters(self, definition): bucket_fns = { 'boolean': Histogram.boolean_flag_bucket_parameters, 'flag': Histogram.boolean_flag_bucket_parameters, 'count': Histogram.boolean_flag_bucket_parameters, 'enumerated': Histogram.enumerated_bucket_parameters, 'categorical': Histogram.categorical_bucket_parameters, 'linear': Histogram.linear_bucket_parameters, 'exponential': Histogram.exponential_bucket_parameters, } if self._kind not in bucket_fns: raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)) fn = bucket_fns[self._kind] self.set_bucket_parameters(*fn(definition))
def set_nsITelemetry_kind(self): # Pick a Telemetry implementation type. types = { 'boolean': 'BOOLEAN', 'flag': 'FLAG', 'count': 'COUNT', 'enumerated': 'LINEAR', 'categorical': 'CATEGORICAL', 'linear': 'LINEAR', 'exponential': 'EXPONENTIAL', } if self._kind not in types: raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)) self._nsITelemetry_kind = "nsITelemetry::HISTOGRAM_%s" % types[self._kind]
def check_operating_systems(self, name, definition): if not self._strict_type_checks: return field = 'operating_systems' operating_systems = definition.get(field) DOC_URL = HISTOGRAMS_DOC_URL + "#operating_systems" if not operating_systems: # operating_systems is optional return for operating_system in operating_systems: if not utils.is_valid_os(operating_system): ParserError('Histogram "%s" has unknown operating system "%s" in %s.\n%s' % (name, operating_system, field, DOC_URL)).handle_later()
def check_products(self, name, definition): if not self._strict_type_checks: return field = 'products' products = definition.get(field) DOC_URL = HISTOGRAMS_DOC_URL + "#products" if not products: # products is optional return for product in products: if not utils.is_valid_product(product): ParserError('Histogram "%s" has unknown product "%s" in %s.\n%s' % (name, product, field, DOC_URL)).handle_later()
def ranges(self): """Return an array of lower bounds for each bucket in the histogram.""" bucket_fns = { 'boolean': linear_buckets, 'flag': linear_buckets, 'count': linear_buckets, 'enumerated': linear_buckets, 'categorical': linear_buckets, 'linear': linear_buckets, 'exponential': exponential_buckets, } if self._kind not in bucket_fns: raise ParserError('Unknown kind "%s" for histogram "%s".' % (self._kind, self._name)) fn = bucket_fns[self._kind] return fn(self.low(), self.high(), self.n_buckets())
def check_expiration(self, name, definition): field = 'expires_in_version' expiration = definition.get(field) if not expiration: return # We forbid new probes from using "expires_in_version" : "default" field/value pair. # Old ones that use this are added to the whitelist. if expiration == "default" and \ whitelists is not None and \ name not in whitelists['expiry_default']: raise ParserError('New histogram "%s" cannot have "default" %s value.' % (name, field)) if re.match(r'^[1-9][0-9]*$', expiration): expiration = expiration + ".0a1" elif re.match(r'^[1-9][0-9]*\.0$', expiration): expiration = expiration + "a1" definition[field] = expiration
def set_bucket_parameters(self, low, high, n_buckets): self._low = low self._high = high self._n_buckets = n_buckets if whitelists is not None and self._n_buckets > 100 and type( self._n_buckets) is int: if self._name not in whitelists['n_buckets']: ParserError( 'New histogram "%s" is not permitted to have more than 100 buckets.\n' 'Histograms with large numbers of buckets use disproportionately high' ' amounts of resources. Contact a Telemetry peer (e.g. in #telemetry)' ' if you think an exception ought to be made:\n' 'https://wiki.mozilla.org/Modules/Toolkit#Telemetry' % self._name).handle_later() if (cytpes.c_int(low) < 0) or (ctypes.c_int(high) < 0): ParseError( 'Either low_bucket or high_bucket is over the maximum load for an integer\n' 'For more information on Ctype please visit\n' 'https://docs.python.org/3/library/ctypes.html').handle_later( )
def check_whitelisted_kind(self, name, definition): # We don't need to run any of these checks on the server. if not self._strict_type_checks or whitelists is None: return # Disallow "flag" and "count" histograms on desktop, suggest to use # scalars instead. Allow using these histograms on Android, as we # don't support scalars there yet. hist_kind = definition.get("kind") android_target = "android" in definition.get("operating_systems", []) if not android_target and \ hist_kind in ["flag", "count"] and \ name not in whitelists["kind"]: ParserError(('Unsupported kind "%s" for histogram "%s":\n' 'New "%s" histograms are not supported on Desktop, you should' ' use scalars instead:\n' '%s\n' 'Are you trying to add a histogram on Android?' ' Add "operating_systems": ["android"] to your histogram definition.') % (hist_kind, name, hist_kind, SCALARS_DOC_URL)).handle_now()
def validate_names(self, category_name, probe_name): """Validate the category and probe name: - Category name must be alpha-numeric + '.', no leading/trailing digit or '.'. - Probe name must be alpha-numeric + '_', no leading/trailing digit or '_'. :param category_name: the name of the category the probe is in. :param probe_name: the name of the scalar probe. :raises ParserError: if the length of the names exceeds the limit or they don't conform our name specification. """ # Enforce a maximum length on category and probe names. MAX_NAME_LENGTH = 40 for n in [category_name, probe_name]: if len(n) > MAX_NAME_LENGTH: ParserError(( "Name '{}' exceeds maximum name length of {} characters.\n" "See: {}#the-yaml-definition-file").format( n, MAX_NAME_LENGTH, BASE_DOC_URL)).handle_later() def check_name(name, error_msg_prefix, allowed_char_regexp): # Check if we only have the allowed characters. chars_regxp = r'^[a-zA-Z0-9' + allowed_char_regexp + r']+$' if not re.search(chars_regxp, name): ParserError((error_msg_prefix + " name must be alpha-numeric. Got: '{}'.\n" "See: {}#the-yaml-definition-file").format( name, BASE_DOC_URL)).handle_later() # Don't allow leading/trailing digits, '.' or '_'. if re.search(r'(^[\d\._])|([\d\._])$', name): ParserError((error_msg_prefix + " name must not have a leading/trailing " "digit, a dot or underscore. Got: '{}'.\n" " See: {}#the-yaml-definition-file").format( name, BASE_DOC_URL)).handle_later() check_name(category_name, 'Category', r'\.') check_name(probe_name, 'Probe', r'_')