def test_get_interface_allows_http(): from sentry.interfaces.http import Http result = get_interface("request") assert result is Http result = get_interface("request") assert result is Http
def post(self, request, project, helper, **kwargs): json_body = helper.safely_load_json_string(request.body) report_type = self.security_report_type(json_body) if report_type is None: raise APIError('Unrecognized security report type') interface = get_interface(report_type) try: instance = interface.from_raw(json_body) except jsonschema.ValidationError as e: raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) # Do origin check based on the `document-uri` key as explained in `_dispatch`. origin = instance.get_origin() if not is_valid_origin(origin, project): if project: tsdb.incr(tsdb.models.project_total_received_cors, project.id) raise APIForbidden('Invalid origin') data = { 'interface': interface.path, 'report': instance, 'release': request.GET.get('sentry_release'), 'environment': request.GET.get('sentry_environment'), } response_or_event_id = self.process( request, project=project, helper=helper, data=data, **kwargs ) if isinstance(response_or_event_id, HttpResponse): return response_or_event_id return HttpResponse(content_type='application/javascript', status=201)
def post(self, request, project, helper, key, **kwargs): json_body = safely_load_json_string(request.body) report_type = self.security_report_type(json_body) if report_type is None: raise APIError('Unrecognized security report type') interface = get_interface(report_type) try: instance = interface.from_raw(json_body) except jsonschema.ValidationError as e: raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) # Do origin check based on the `document-uri` key as explained in `_dispatch`. origin = instance.get_origin() if not is_valid_origin(origin, project): if project: track_outcome(project.organization_id, project.id, key.id, 'invalid', FilterStatKeys.CORS) raise APIForbidden('Invalid origin') data = { 'interface': interface.path, 'report': instance, 'release': request.GET.get('sentry_release'), 'environment': request.GET.get('sentry_environment'), } self.process(request, project=project, helper=helper, data=data, key=key, **kwargs) return HttpResponse(content_type='application/javascript', status=201)
def should_filter(self): ''' returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics ''' for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter(self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self.relay_config, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get('release') if release and not is_valid_release(self.relay_config, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = get_path(self._data, 'logentry', 'formatted') \ or get_path(self._data, 'logentry', 'message') \ or '' if error_message and not is_valid_error_message(self.relay_config, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]): message = u': '.join( filter(None, map(exc.get, ['type', 'value'])) ) if message and not is_valid_error_message(self.relay_config, message): return (True, FilterStatKeys.ERROR_MESSAGE) return should_filter_event(self.relay_config, self._data)
def post(self, request, project, helper, **kwargs): json_body = safely_load_json_string(request.body) report_type = self.security_report_type(json_body) if report_type is None: raise APIError('Unrecognized security report type') interface = get_interface(report_type) try: instance = interface.from_raw(json_body) except jsonschema.ValidationError as e: raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) # Do origin check based on the `document-uri` key as explained in `_dispatch`. origin = instance.get_origin() if not is_valid_origin(origin, project): if project: tsdb.incr(tsdb.models.project_total_received_cors, project.id) raise APIForbidden('Invalid origin') data = { 'interface': interface.path, 'report': instance, 'release': request.GET.get('sentry_release'), 'environment': request.GET.get('sentry_environment'), } response_or_event_id = self.process(request, project=project, helper=helper, data=data, **kwargs) if isinstance(response_or_event_id, HttpResponse): return response_or_event_id return HttpResponse(content_type='application/javascript', status=201)
def should_filter(self): """ returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics """ for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter(self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self.project_config, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get("release") if release and not is_valid_release(self.project_config, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = ( get_path(self._data, "logentry", "formatted") or get_path(self._data, "logentry", "message") or "" ) if error_message and not is_valid_error_message(self.project_config, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, "exception", "values", filter=True, default=[]): message = u": ".join(filter(None, map(exc.get, ["type", "value"]))) if message and not is_valid_error_message(self.project_config, message): return (True, FilterStatKeys.ERROR_MESSAGE) return should_filter_event(self.project_config, self._data)
def should_filter(self, project, data, ip_address=None): for name in self.report_interfaces: if name in data: interface = get_interface(name) if interface.to_python(data[name]).should_filter(project): return (True, FilterStatKeys.INVALID_CSP) return super(SecurityApiHelper, self).should_filter(project, data, ip_address)
def post(self, request, project, helper, key, project_config, **kwargs): json_body = safely_load_json_string(request.body) report_type = self.security_report_type(json_body) if report_type is None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "security_report_type", ) raise APIError("Unrecognized security report type") interface = get_interface(report_type) try: instance = interface.from_raw(json_body) except jsonschema.ValidationError as e: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "security_report", ) raise APIError("Invalid security report: %s" % str(e).splitlines()[0]) # Do origin check based on the `document-uri` key as explained in `_dispatch`. origin = instance.get_origin() if not is_valid_origin(origin, project): track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, FilterStatKeys.CORS, ) raise APIForbidden("Invalid origin") data = { "interface": interface.path, "report": instance, "release": request.GET.get("sentry_release"), "environment": request.GET.get("sentry_environment"), } self.process( request, project=project, helper=helper, data=data, key=key, project_config=project_config, **kwargs ) return HttpResponse(content_type="application/javascript", status=201)
def process_csp_report(self): """Only called from the CSP report endpoint.""" data = self._data try: interface = get_interface(data.pop('interface')) report = data.pop('report') except KeyError: raise APIForbidden('No report or interface data') # To support testing, we can either accept a built interface instance, or the raw data in # which case we build the instance ourselves try: instance = ( report if isinstance(report, interface) else interface.from_raw(report) ) except jsonschema.ValidationError as e: raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) def clean(d): return dict(filter(lambda x: x[1], d.items())) data.update( { 'logger': 'csp', 'message': instance.get_message(), 'culprit': instance.get_culprit(), instance.path: instance.to_json(), 'tags': instance.get_tags(), 'errors': [], 'user': {'ip_address': self._client_ip}, # Construct a faux Http interface based on the little information we have # This is a bit weird, since we don't have nearly enough # information to create an Http interface, but # this automatically will pick up tags for the User-Agent # which is actually important here for CSP 'request': { 'url': instance.get_origin(), 'headers': clean( { 'User-Agent': self._user_agent, 'Referer': instance.get_referrer(), } ), }, } ) self._data = data
def process_csp_report(self): """Only called from the CSP report endpoint.""" data = self._data try: interface = get_interface(data.pop("interface")) report = data.pop("report") except KeyError: raise APIForbidden("No report or interface data") # To support testing, we can either accept a built interface instance, or the raw data in # which case we build the instance ourselves try: instance = report if isinstance( report, interface) else interface.from_raw(report) except jsonschema.ValidationError as e: raise APIError("Invalid security report: %s" % str(e).splitlines()[0]) def clean(d): return dict([x for x in d.items() if x[1]]) data.update({ "logger": "csp", "message": instance.get_message(), "culprit": instance.get_culprit(), instance.path: instance.to_json(), "tags": instance.get_tags(), "errors": [], "user": { "ip_address": self._client_ip }, # Construct a faux Http interface based on the little information we have # This is a bit weird, since we don't have nearly enough # information to create an Http interface, but # this automatically will pick up tags for the User-Agent # which is actually important here for CSP "request": { "url": instance.get_origin(), "headers": clean({ "User-Agent": self._user_agent, "Referer": instance.get_referrer() }), }, }) self._data = data
def interfaces(self): result = [] for key, data in self.data.iteritems(): try: cls = get_interface(key) except ValueError: continue value = safe_execute(cls.to_python, data) if not value: continue result.append((key, value)) return SortedDict((k, v) for k, v in sorted(result, key=lambda x: x[1].get_score(), reverse=True))
def post(self, request, project, helper, key, **kwargs): json_body = safely_load_json_string(request.body) report_type = self.security_report_type(json_body) if report_type is None: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, "security_report_type") raise APIError('Unrecognized security report type') interface = get_interface(report_type) try: instance = interface.from_raw(json_body) except jsonschema.ValidationError as e: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, "security_report") raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) # Do origin check based on the `document-uri` key as explained in `_dispatch`. origin = instance.get_origin() if not is_valid_origin(origin, project): if project: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, FilterStatKeys.CORS) raise APIForbidden('Invalid origin') data = { 'interface': interface.path, 'report': instance, 'release': request.GET.get('sentry_release'), 'environment': request.GET.get('sentry_environment'), } self.process(request, project=project, helper=helper, data=data, key=key, **kwargs) return HttpResponse(content_type='application/javascript', status=201)
def should_filter(self): ''' returns (result: bool, reason: string or None) Result is True if an event should be filtered The reason for filtering is passed along as a string so that we can store it in metrics ''' for name in SECURITY_REPORT_INTERFACES: if name in self._data: interface = get_interface(name) if interface.to_python(self._data[name]).should_filter(self._project): return (True, FilterStatKeys.INVALID_CSP) if self._client_ip and not is_valid_ip(self._project, self._client_ip): return (True, FilterStatKeys.IP_ADDRESS) release = self._data.get('release') if release and not is_valid_release(self._project, release): return (True, FilterStatKeys.RELEASE_VERSION) error_message = get_path(self._data, 'logentry', 'formatted') \ or get_path(self._data, 'logentry', 'message') \ or '' if error_message and not is_valid_error_message(self._project, error_message): return (True, FilterStatKeys.ERROR_MESSAGE) for exc in get_path(self._data, 'exception', 'values', filter=True, default=[]): message = u': '.join( filter(None, map(exc.get, ['type', 'value'])) ) if message and not is_valid_error_message(self._project, message): return (True, FilterStatKeys.ERROR_MESSAGE) for filter_cls in filters.all(): filter_obj = filter_cls(self._project) if filter_obj.is_enabled() and filter_obj.test(self._data): return (True, six.text_type(filter_obj.id)) return (False, None)
def test_allows_http(self): from sentry.interfaces.http import Http result = get_interface('sentry.interfaces.Http') assert result is Http result = get_interface('request') assert result is Http
def test_does_not_let_through_disallowed_name(self): with self.assertRaises(ValueError): get_interface('subprocess')
def normalize(self, request_env=None): request_env = request_env or {} data = self.data errors = data['errors'] = [] # Ignore event meta data for now. data.pop('_meta', None) # Before validating with a schema, attempt to cast values to their desired types # so that the schema doesn't have to take every type variation into account. text = six.text_type fp_types = six.string_types + six.integer_types + (float, ) def to_values(v): return {'values': v} if v and isinstance(v, (tuple, list)) else v def stringify(f): if isinstance(f, float): return text(int(f)) if abs(f) < (1 << 53) else None return text(f) casts = { 'environment': lambda v: text(v) if v is not None else v, 'fingerprint': lambda v: list(x for x in map(stringify, v) if x is not None) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v, 'release': lambda v: text(v) if v is not None else v, 'dist': lambda v: text(v).strip() if v is not None else v, 'time_spent': lambda v: int(v) if v is not None else v, 'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()], 'timestamp': lambda v: process_timestamp(v), 'platform': lambda v: v if v in VALID_PLATFORMS else 'other', 'logentry': lambda v: v if isinstance(v, dict) else {'message': v}, # These can be sent as lists and need to be converted to {'values': [...]} 'exception': to_values, 'breadcrumbs': to_values, 'threads': to_values, } for c in casts: if c in data: try: data[c] = casts[c](data[c]) except InvalidTimestamp as it: errors.append({'type': it.args[0], 'name': c, 'value': data[c]}) del data[c] except Exception as e: errors.append({'type': EventError.INVALID_DATA, 'name': c, 'value': data[c]}) del data[c] # raw 'message' is coerced to the Message interface, as its used for pure index of # searchable strings. If both a raw 'message' and a Message interface exist, try and # add the former as the 'formatted' attribute of the latter. # See GH-3248 msg_str = data.pop('message', None) if msg_str: msg_if = data.get('logentry') msg_meta = data.get('_meta', {}).get('message') if not msg_if: msg_if = data['logentry'] = {'message': msg_str} if msg_meta: data.setdefault('_meta', {}).setdefault('logentry', {})['message'] = msg_meta if msg_if.get('message') != msg_str: if not msg_if.get('formatted'): msg_if['formatted'] = msg_str if msg_meta: data.setdefault('_meta', {}).setdefault( 'logentry', {})['formatted'] = msg_meta # Fill in ip addresses marked as {{auto}} client_ip = request_env.get('client_ip') if client_ip: if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR']) == '{{auto}}': data['sentry.interfaces.Http']['env']['REMOTE_ADDR'] = client_ip if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}': data['request']['env']['REMOTE_ADDR'] = client_ip if get_path(data, ['sentry.interfaces.User', 'ip_address']) == '{{auto}}': data['sentry.interfaces.User']['ip_address'] = client_ip if get_path(data, ['user', 'ip_address']) == '{{auto}}': data['user']['ip_address'] = client_ip # Validate main event body and tags against schema. # XXX(ja): jsonschema does not like CanonicalKeyDict, so we need to pass # in the inner data dict. is_valid, event_errors = validate_and_default_interface(data.data, 'event') errors.extend(event_errors) if 'tags' in data: is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags') errors.extend(tag_errors) # Validate interfaces for k in list(iter(data)): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.logger.debug('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.logger.debug('Ignored unknown attribute: %s', k) errors.append({'type': EventError.INVALID_ATTRIBUTE, 'name': k}) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: log = self.logger.debug if isinstance( e, InterfaceValidationError) else self.logger.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) errors.append({'type': EventError.INVALID_DATA, 'name': k, 'value': value}) # Additional data coercion and defaulting level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()): level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL) data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL]) if data.get('dist') and not data.get('release'): data['dist'] = None timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() # TODO (alex) can this all be replaced by utcnow? # it looks like the only time that this would even be hit is when timestamp # is not defined, as the earlier process_timestamp already converts existing # timestamps to floats. if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) data.setdefault('checksum', None) data.setdefault('culprit', None) data.setdefault('dist', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('fingerprint', None) data.setdefault('logger', DEFAULT_LOGGER_NAME) data.setdefault('platform', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('tags', []) data.setdefault('transaction', None) # Fix case where legacy apps pass 'environment' as a tag # instead of a top level key. # TODO (alex) save() just reinserts the environment into the tags if not data.get('environment'): tagsdict = dict(data['tags']) if 'environment' in tagsdict: data['environment'] = tagsdict['environment'] del tagsdict['environment'] data['tags'] = tagsdict.items() # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] # Exception mechanism needs SDK information to resolve proper names in # exception meta (such as signal names). "SDK Information" really means # the operating system version the event was generated on. Some # normalization still works without sdk_info, such as mach_exception # names (they can only occur on macOS). if exception: sdk_info = get_sdk_from_event(data) for ex in exception['values']: if 'mechanism' in ex: normalize_mechanism_meta(ex['mechanism'], sdk_info) # If there is no User ip_addres, update it either from the Http interface # or the client_ip of the request. auth = request_env.get('auth') is_public = auth and auth.is_public add_ip_platforms = ('javascript', 'cocoa', 'objc') http_ip = data.get('sentry.interfaces.Http', {}).get('env', {}).get('REMOTE_ADDR') if http_ip: data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', http_ip) elif client_ip and (is_public or data.get('platform') in add_ip_platforms): data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', client_ip) # Trim values data['logger'] = trim(data['logger'].strip(), 64) trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['transaction']: data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) return data
def validate_data(self, project, data): # TODO(dcramer): move project out of the data packet data['project'] = project.id data.setdefault('errors', []) if not data.get('message'): data['message'] = '<no message value>' elif not isinstance(data['message'], six.string_types): raise APIForbidden('Invalid value for message') if data.get('culprit'): if not isinstance(data['culprit'], six.string_types): raise APIForbidden('Invalid value for culprit') if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex elif not isinstance(data['event_id'], six.string_types): raise APIForbidden('Invalid value for event_id') if len(data['event_id']) > 32: self.log.info( 'Discarded value for event_id due to length (%d chars)', len(data['event_id'])) data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'event_id', 'value': data['event_id'], }) data['event_id'] = uuid.uuid4().hex if 'timestamp' in data: try: self._process_data_timestamp(data) except InvalidTimestamp as e: self.log.info('Discarded invalid value for timestamp: %r', data['timestamp'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'timestamp', 'value': data['timestamp'], }) del data['timestamp'] if data.get('modules') and type(data['modules']) != dict: self.log.info('Discarded invalid type for modules: %s', type(data['modules'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'modules', 'value': data['modules'], }) del data['modules'] if data.get('extra') is not None and type(data['extra']) != dict: self.log.info('Discarded invalid type for extra: %s', type(data['extra'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'extra', 'value': data['extra'], }) del data['extra'] if data.get('tags') is not None: if type(data['tags']) == dict: data['tags'] = data['tags'].items() elif not isinstance(data['tags'], (list, tuple)): self.log.info('Discarded invalid type for tags: %s', type(data['tags'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': data['tags'], }) del data['tags'] if data.get('tags'): # remove any values which are over 32 characters tags = [] for pair in data['tags']: try: k, v = pair except ValueError: self.log.info('Discarded invalid tag value: %r', pair) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: self.log.info('Discarded invalid tag key: %r', type(k)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: self.log.info('Discarded invalid tag value: %s=%r', k, type(v)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if len(k) > MAX_TAG_KEY_LENGTH or len( v) > MAX_TAG_VALUE_LENGTH: self.log.info('Discarded invalid tag: %s=%s', k, v) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue tags.append((k, v)) data['tags'] = tags for k in data.keys(): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.log.info('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.log.info('Ignored unknown attribute: %s', k) data['errors'].append({ 'type': EventError.INVALID_ATTRIBUTE, 'name': k, }) continue if type(value) != dict: # HACK(dcramer): the exception interface supports a list as the # value. We should change this in a new protocol version. if type(value) in (list, tuple): value = {'values': value} else: self.log.info('Invalid parameter for value: %s (%r)', k, type(value)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, AssertionError): log = self.log.info else: log = self.log.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data['level'] = LOG_LEVEL_REVERSE_MAP[level] except KeyError as e: self.log.info('Discarded invalid logger value: %s', level) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'level', 'value': level, }) data['level'] = LOG_LEVEL_REVERSE_MAP.get( DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) if data.get('release'): data['release'] = unicode(data['release']) if len(data['release']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'release', 'value': data['release'], }) del data['release'] return data
def normalize(self, request_env=None): request_env = request_env or {} data = self.data errors = data['errors'] = [] # Before validating with a schema, attempt to cast values to their desired types # so that the schema doesn't have to take every type variation into account. text = six.text_type fp_types = six.string_types + six.integer_types + (float, ) def to_values(v): return {'values': v} if v and isinstance(v, (tuple, list)) else v def convert_fingerprint(values): rv = values[:] bad_float = False for idx, item in enumerate(rv): if isinstance(item, float) and \ (abs(item) >= (1 << 53) or int(item) != item): bad_float = True rv[idx] = text(item) if bad_float: metrics.incr( 'events.bad_float_fingerprint', skip_internal=True, tags={ 'project_id': data.get('project'), }, ) return rv casts = { 'environment': lambda v: text(v) if v is not None else v, 'fingerprint': lambda v: convert_fingerprint(v) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v, 'release': lambda v: text(v) if v is not None else v, 'dist': lambda v: text(v).strip() if v is not None else v, 'time_spent': lambda v: int(v) if v is not None else v, 'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()], 'timestamp': lambda v: process_timestamp(v), 'platform': lambda v: v if v in VALID_PLATFORMS else 'other', 'sentry.interfaces.Message': lambda v: v if isinstance(v, dict) else { 'message': v }, # These can be sent as lists and need to be converted to {'values': [...]} 'exception': to_values, 'sentry.interfaces.Exception': to_values, 'breadcrumbs': to_values, 'sentry.interfaces.Breadcrumbs': to_values, 'threads': to_values, 'sentry.interfaces.Threads': to_values, } for c in casts: if c in data: try: data[c] = casts[c](data[c]) except InvalidTimestamp as it: errors.append({ 'type': it.args[0], 'name': c, 'value': data[c] }) del data[c] except Exception as e: errors.append({ 'type': EventError.INVALID_DATA, 'name': c, 'value': data[c] }) del data[c] # raw 'message' is coerced to the Message interface, as its used for pure index of # searchable strings. If both a raw 'message' and a Message interface exist, try and # add the former as the 'formatted' attribute of the latter. # See GH-3248 msg_str = data.pop('message', None) if msg_str: msg_if = data.setdefault('sentry.interfaces.Message', {'message': msg_str}) if msg_if.get('message') != msg_str: msg_if.setdefault('formatted', msg_str) # Fill in ip addresses marked as {{auto}} client_ip = request_env.get('client_ip') if client_ip: if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR' ]) == '{{auto}}': data['sentry.interfaces.Http']['env'][ 'REMOTE_ADDR'] = client_ip if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}': data['request']['env']['REMOTE_ADDR'] = client_ip if get_path( data, ['sentry.interfaces.User', 'ip_address']) == '{{auto}}': data['sentry.interfaces.User']['ip_address'] = client_ip if get_path(data, ['user', 'ip_address']) == '{{auto}}': data['user']['ip_address'] = client_ip # Validate main event body and tags against schema is_valid, event_errors = validate_and_default_interface(data, 'event') errors.extend(event_errors) if 'tags' in data: is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags') errors.extend(tag_errors) # Validate interfaces for k in list(iter(data)): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.logger.debug('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.logger.debug('Ignored unknown attribute: %s', k) errors.append({ 'type': EventError.INVALID_ATTRIBUTE, 'name': k }) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: log = self.logger.debug if isinstance( e, InterfaceValidationError) else self.logger.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) errors.append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value }) # Additional data coercion and defaulting level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()): level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL) data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL]) if data.get('dist') and not data.get('release'): data['dist'] = None timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() # TODO (alex) can this all be replaced by utcnow? # it looks like the only time that this would even be hit is when timestamp # is not defined, as the earlier process_timestamp already converts existing # timestamps to floats. if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) data.setdefault('checksum', None) data.setdefault('culprit', None) data.setdefault('dist', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('fingerprint', None) data.setdefault('logger', DEFAULT_LOGGER_NAME) data.setdefault('platform', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('tags', []) data.setdefault('transaction', None) # Fix case where legacy apps pass 'environment' as a tag # instead of a top level key. # TODO (alex) save() just reinserts the environment into the tags if not data.get('environment'): tagsdict = dict(data['tags']) if 'environment' in tagsdict: data['environment'] = tagsdict['environment'] del tagsdict['environment'] data['tags'] = tagsdict.items() # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] # Exception mechanism needs SDK information to resolve proper names in # exception meta (such as signal names). "SDK Information" really means # the operating system version the event was generated on. Some # normalization still works without sdk_info, such as mach_exception # names (they can only occur on macOS). if exception: sdk_info = get_sdk_from_event(data) for ex in exception['values']: if 'mechanism' in ex: normalize_mechanism_meta(ex['mechanism'], sdk_info) # If there is no User ip_addres, update it either from the Http interface # or the client_ip of the request. auth = request_env.get('auth') is_public = auth and auth.is_public add_ip_platforms = ('javascript', 'cocoa', 'objc') http_ip = data.get('sentry.interfaces.Http', {}).get('env', {}).get('REMOTE_ADDR') if http_ip: data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', http_ip) elif client_ip and (is_public or data.get('platform') in add_ip_platforms): data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', client_ip) # Trim values data['logger'] = trim(data['logger'].strip(), 64) trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['transaction']: data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) return data
def normalize(self): data = self.data errors = data.get('errors', []) # Before validating with a schema, attempt to cast values to their desired types # so that the schema doesn't have to take every type variation into account. text = six.text_type fp_types = six.string_types + six.integer_types + (float, ) def to_values(v): return {'values': v} if v and isinstance(v, (tuple, list)) else v casts = { 'environment': lambda v: text(v) if v is not None else v, 'fingerprint': lambda v: list(map(text, v)) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v, 'release': lambda v: text(v) if v is not None else v, 'dist': lambda v: text(v).strip() if v is not None else v, 'time_spent': lambda v: int(v) if v is not None else v, 'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()], 'timestamp': lambda v: process_timestamp(v), 'platform': lambda v: v if v in VALID_PLATFORMS else 'other', # These can be sent as lists and need to be converted to {'values': [...]} 'exception': to_values, 'sentry.interfaces.Exception': to_values, 'breadcrumbs': to_values, 'sentry.interfaces.Breadcrumbs': to_values, 'threads': to_values, 'sentry.interfaces.Threads': to_values, } for c in casts: if c in data: try: data[c] = casts[c](data[c]) except Exception as e: errors.append({ 'type': EventError.INVALID_DATA, 'name': c, 'value': data[c] }) del data[c] # raw 'message' is coerced to the Message interface, as its used for pure index of # searchable strings. If both a raw 'message' and a Message interface exist, try and # add the former as the 'formatted' attribute of the latter. # See GH-3248 msg_str = data.pop('message', None) if msg_str: msg_if = data.setdefault('sentry.interfaces.Message', {'message': msg_str}) if msg_if.get('message') != msg_str: msg_if.setdefault('formatted', msg_str) # Validate main event body and tags against schema is_valid, event_errors = validate_and_default_interface(data, 'event') errors.extend(event_errors) if 'tags' in data: is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags') errors.extend(tag_errors) # Validate interfaces for k in list(iter(data)): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.logger.debug('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.logger.debug('Ignored unknown attribute: %s', k) errors.append({ 'type': EventError.INVALID_ATTRIBUTE, 'name': k }) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: log = self.logger.debug if isinstance( e, InterfaceValidationError) else self.logger.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) errors.append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value }) level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()): level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL) data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL]) if data.get('dist') and not data.get('release'): data['dist'] = None timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() # TODO (alex) can this all be replaced by utcnow? # it looks like the only time that this would even be hit is when timestamp # is not defined, as the earlier process_timestamp already converts existing # timestamps to floats. if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) data.setdefault('culprit', None) data.setdefault('transaction', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('dist', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('tags', []) # Fix case where legacy apps pass 'environment' as a tag # instead of a top level key. # TODO (alex) save() just reinserts the environment into the tags if not data.get('environment'): tagsdict = dict(data['tags']) if 'environment' in tagsdict: data['environment'] = tagsdict['environment'] del tagsdict['environment'] data['tags'] = tagsdict.items() # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'), required=False, ) if ip_address: data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', ip_address) except ValueError: pass # Trim values logger = data.get('logger', DEFAULT_LOGGER_NAME) data['logger'] = trim(logger.strip(), 64) trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['transaction']: data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) data['errors'] = errors return data
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if TagKey.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) current_timestamp = timezone.now() timestamp = data.get('timestamp') if not timestamp: timestamp = current_timestamp if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('culprit', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = list(tags.items()) # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue # XXX(dcramer): many legacy apps are using the environment tag # rather than the key itself if key == 'environment' and not data.get('environment'): data['environment'] = value else: data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict( data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in list(iter(data)): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: # XXX: we should consider logging this. pass # TODO(dcramer): this logic is duplicated in ``validate_data`` from # coreapi # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop('message', None) if message: if 'sentry.interfaces.Message' not in data: interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python({ 'message': message, }) data[inst.get_path()] = inst.to_json() except Exception: pass elif not data['sentry.interfaces.Message'].get('formatted'): interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python(dict( data['sentry.interfaces.Message'], formatted=message, )) data[inst.get_path()] = inst.to_json() except Exception: pass # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get( 'env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault( 'ip_address', ip_address) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) return data
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger') or not isinstance(data.get('logger'), six.string_types): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if tagstore.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) current_timestamp = timezone.now() timestamp = data.get('timestamp') if not timestamp: timestamp = current_timestamp if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('culprit', None) data.setdefault('transaction', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('dist', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = list(tags.items()) # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue # XXX(dcramer): many legacy apps are using the environment tag # rather than the key itself if key == 'environment' and not data.get('environment'): data['environment'] = value else: data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in list(iter(data)): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: # XXX: we should consider logging this. pass # TODO(dcramer): this logic is duplicated in ``validate_data`` from # coreapi # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop('message', None) if message: if 'sentry.interfaces.Message' not in data: interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python({ 'message': message, }) data[inst.get_path()] = inst.to_json() except Exception: pass elif not data['sentry.interfaces.Message'].get('formatted'): interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python( dict( data['sentry.interfaces.Message'], formatted=message, ) ) data[inst.get_path()] = inst.to_json() except Exception: pass # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault('ip_address', ip_address) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['transaction']: data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) return data
def validate_data(self, project, data): # TODO(dcramer): move project out of the data packet data["project"] = project.id data["errors"] = [] if not data.get("message"): data["message"] = "<no message value>" elif not isinstance(data["message"], six.string_types): raise APIForbidden("Invalid value for message") if data.get("culprit"): if not isinstance(data["culprit"], six.string_types): raise APIForbidden("Invalid value for culprit") if not data.get("event_id"): data["event_id"] = uuid.uuid4().hex elif not isinstance(data["event_id"], six.string_types): raise APIForbidden("Invalid value for event_id") if len(data["event_id"]) > 32: self.log.info("Discarded value for event_id due to length (%d chars)", len(data["event_id"])) data["errors"].append({"type": EventError.VALUE_TOO_LONG, "name": "event_id", "value": data["event_id"]}) data["event_id"] = uuid.uuid4().hex if "timestamp" in data: try: self._process_data_timestamp(data) except InvalidTimestamp as e: self.log.info("Discarded invalid value for timestamp: %r", data["timestamp"], exc_info=True) data["errors"].append( {"type": EventError.INVALID_DATA, "name": "timestamp", "value": data["timestamp"]} ) del data["timestamp"] if "fingerprint" in data: try: self._process_fingerprint(data) except InvalidFingerprint as e: self.log.info("Discarded invalid value for fingerprint: %r", data["fingerprint"], exc_info=True) data["errors"].append( {"type": EventError.INVALID_DATA, "name": "fingerprint", "value": data["fingerprint"]} ) if "platform" not in data or data["platform"] not in VALID_PLATFORMS: data["platform"] = "other" if data.get("modules") and type(data["modules"]) != dict: self.log.info("Discarded invalid type for modules: %s", type(data["modules"])) data["errors"].append({"type": EventError.INVALID_DATA, "name": "modules", "value": data["modules"]}) del data["modules"] if data.get("extra") is not None and type(data["extra"]) != dict: self.log.info("Discarded invalid type for extra: %s", type(data["extra"])) data["errors"].append({"type": EventError.INVALID_DATA, "name": "extra", "value": data["extra"]}) del data["extra"] if data.get("tags") is not None: if type(data["tags"]) == dict: data["tags"] = data["tags"].items() elif not isinstance(data["tags"], (list, tuple)): self.log.info("Discarded invalid type for tags: %s", type(data["tags"])) data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": data["tags"]}) del data["tags"] if data.get("tags"): # remove any values which are over 32 characters tags = [] for pair in data["tags"]: try: k, v = pair except ValueError: self.log.info("Discarded invalid tag value: %r", pair) data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair}) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: self.log.info("Discarded invalid tag key: %r", type(k)) data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair}) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: self.log.info("Discarded invalid tag value: %s=%r", k, type(v)) data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair}) continue if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH: self.log.info("Discarded invalid tag: %s=%s", k, v) data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair}) continue # support tags with spaces by converting them k = k.replace(" ", "-") if not TagKey.is_valid_key(k): self.log.info("Discarded invalid tag key: %s", k) data["errors"].append({"type": EventError.INVALID_DATA, "name": "tags", "value": pair}) continue tags.append((k, v)) data["tags"] = tags for k in data.keys(): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.log.info("Ignored empty interface value: %s", k) continue try: interface = get_interface(k) except ValueError: self.log.info("Ignored unknown attribute: %s", k) data["errors"].append({"type": EventError.INVALID_ATTRIBUTE, "name": k}) continue if type(value) != dict: # HACK(dcramer): the exception interface supports a list as the # value. We should change this in a new protocol version. if type(value) in (list, tuple): value = {"values": value} else: self.log.info("Invalid parameter for value: %s (%r)", k, type(value)) data["errors"].append({"type": EventError.INVALID_DATA, "name": k, "value": value}) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, InterfaceValidationError): log = self.log.info else: log = self.log.error log("Discarded invalid value for interface: %s (%r)", k, value, exc_info=True) data["errors"].append({"type": EventError.INVALID_DATA, "name": k, "value": value}) level = data.get("level") or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data["level"] = LOG_LEVEL_REVERSE_MAP[level] except KeyError as e: self.log.info("Discarded invalid logger value: %s", level) data["errors"].append({"type": EventError.INVALID_DATA, "name": "level", "value": level}) data["level"] = LOG_LEVEL_REVERSE_MAP.get(DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) if data.get("release"): data["release"] = unicode(data["release"]) if len(data["release"]) > 64: data["errors"].append({"type": EventError.VALUE_TOO_LONG, "name": "release", "value": data["release"]}) del data["release"] return data
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get("level"), (six.string_types, int)): data["level"] = logging.ERROR elif data["level"] not in LOG_LEVELS: data["level"] = logging.ERROR if not data.get("logger"): data["logger"] = DEFAULT_LOGGER_NAME else: logger = trim(data["logger"].strip(), 64) if TagKey.is_valid_key(logger): data["logger"] = logger else: data["logger"] = DEFAULT_LOGGER_NAME if data.get("platform"): data["platform"] = trim(data["platform"], 64) current_timestamp = timezone.now() timestamp = data.get("timestamp") if not timestamp: timestamp = current_timestamp if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime("%s")) data["timestamp"] = timestamp data["received"] = float(timezone.now().strftime("%s")) if not data.get("event_id"): data["event_id"] = uuid4().hex data.setdefault("culprit", None) data.setdefault("server_name", None) data.setdefault("site", None) data.setdefault("checksum", None) data.setdefault("fingerprint", None) data.setdefault("platform", None) data.setdefault("environment", None) data.setdefault("extra", {}) data.setdefault("errors", []) tags = data.get("tags") if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data["tags"] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue data["tags"].append((key, value)) # XXX(dcramer): many legacy apps are using the environment tag # rather than the key itself if key == "environment" and not data.get("environment"): data["environment"] = value if not isinstance(data["extra"], dict): # throw it away data["extra"] = {} trim_dict(data["extra"], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in data.keys(): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: # XXX: we should consider logging this. pass # TODO(dcramer): this logic is duplicated in ``validate_data`` from # coreapi # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop("message", None) if message: if "sentry.interfaces.Message" not in data: interface = get_interface("sentry.interfaces.Message") try: inst = interface.to_python({"message": message}) data[inst.get_path()] = inst.to_json() except Exception: pass elif not data["sentry.interfaces.Message"].get("formatted"): interface = get_interface("sentry.interfaces.Message") try: inst = interface.to_python(dict(data["sentry.interfaces.Message"], formatted=message)) data[inst.get_path()] = inst.to_json() except Exception: pass # the SDKs currently do not describe event types, and we must infer # them from available attributes data["type"] = eventtypes.infer(data).key data["version"] = self.version # TODO(dcramer): find a better place for this logic exception = data.get("sentry.interfaces.Exception") stacktrace = data.get("sentry.interfaces.Stacktrace") if exception and len(exception["values"]) == 1 and stacktrace: exception["values"][0]["stacktrace"] = stacktrace del data["sentry.interfaces.Stacktrace"] if "sentry.interfaces.Http" in data: try: ip_address = validate_ip( data["sentry.interfaces.Http"].get("env", {}).get("REMOTE_ADDR"), required=False ) except ValueError: ip_address = None if ip_address: data.setdefault("sentry.interfaces.User", {}) data["sentry.interfaces.User"].setdefault("ip_address", ip_address) if data["culprit"]: data["culprit"] = trim(data["culprit"], MAX_CULPRIT_LENGTH) return data
def validate_data(project, data, client=None): # TODO(dcramer): move project out of the data packet data['project'] = project.id if not data.get('message'): data['message'] = '<no message value>' elif not isinstance(data['message'], six.string_types): raise APIError('Invalid value for message') elif len(data['message']) > settings.SENTRY_MAX_MESSAGE_LENGTH: logger.info( 'Truncated value for message due to length (%d chars)', len(data['message']), **client_metadata(client, project)) data['message'] = truncatechars( data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH) if data.get('culprit'): if not isinstance(data['culprit'], six.string_types): raise APIError('Invalid value for culprit') logger.info( 'Truncated value for culprit due to length (%d chars)', len(data['culprit']), **client_metadata(client, project)) data['culprit'] = truncatechars(data['culprit'], MAX_CULPRIT_LENGTH) if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex elif not isinstance(data['event_id'], six.string_types): raise APIError('Invalid value for event_id') if len(data['event_id']) > 32: logger.info( 'Discarded value for event_id due to length (%d chars)', len(data['event_id']), **client_metadata(client, project)) data['event_id'] = uuid.uuid4().hex if 'timestamp' in data: try: process_data_timestamp(data) except InvalidTimestamp as e: # Log the error, remove the timestamp, and continue logger.info( 'Discarded invalid value for timestamp: %r', data['timestamp'], **client_metadata(client, project, exception=e)) del data['timestamp'] if data.get('modules') and type(data['modules']) != dict: logger.info( 'Discarded invalid type for modules: %s', type(data['modules']), **client_metadata(client, project)) del data['modules'] if data.get('extra') is not None and type(data['extra']) != dict: logger.info( 'Discarded invalid type for extra: %s', type(data['extra']), **client_metadata(client, project)) del data['extra'] if data.get('tags') is not None: if type(data['tags']) == dict: data['tags'] = data['tags'].items() elif not isinstance(data['tags'], (list, tuple)): logger.info( 'Discarded invalid type for tags: %s', type(data['tags']), **client_metadata(client, project)) del data['tags'] if data.get('tags'): # remove any values which are over 32 characters tags = [] for pair in data['tags']: try: k, v = pair except ValueError: logger.info('Discarded invalid tag value: %r', pair, **client_metadata(client, project)) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: logger.info('Discarded invalid tag key: %r', type(k), **client_metadata(client, project)) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: logger.info('Discarded invalid tag value: %s=%r', k, type(v), **client_metadata(client, project)) continue if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH: logger.info('Discarded invalid tag: %s=%s', k, v, **client_metadata(client, project)) continue tags.append((k, v)) data['tags'] = tags for k in data.keys(): if k in RESERVED_FIELDS: continue value = data.pop(k) if not value: logger.info( 'Ignored empty interface value: %s', k, **client_metadata(client, project)) continue try: interface = get_interface(k) except ValueError: logger.info( 'Ignored unknown attribute: %s', k, **client_metadata(client, project)) continue if type(value) != dict: # HACK(dcramer): the exception interface supports a list as the # value. We should change this in a new protocol version. if type(value) in (list, tuple): value = {'values': value} else: logger.info( 'Invalid parameters for value: %s', k, type(value), **client_metadata(client, project)) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, AssertionError): log = logger.info else: log = logger.error log('Discarded invalid value for interface: %s', k, **client_metadata(client, project, exception=e, extra={'value': value})) level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data['level'] = LOG_LEVEL_REVERSE_MAP[level] except KeyError as e: logger.info( 'Discarded invalid logger value: %s', level, **client_metadata(client, project, exception=e)) data['level'] = LOG_LEVEL_REVERSE_MAP.get( DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) return data
def test_get_interface_does_not_let_through_disallowed_name(): with pytest.raises(ValueError): get_interface('subprocess')
def validate_data(project, data, client=None): # TODO(dcramer): move project out of the data packet data["project"] = project.id if not data.get("message"): data["message"] = "<no message value>" elif not isinstance(data["message"], six.string_types): raise APIError("Invalid value for message") if data.get("culprit"): if not isinstance(data["culprit"], six.string_types): raise APIError("Invalid value for culprit") if not data.get("event_id"): data["event_id"] = uuid.uuid4().hex elif not isinstance(data["event_id"], six.string_types): raise APIError("Invalid value for event_id") if len(data["event_id"]) > 32: logger.info( "Discarded value for event_id due to length (%d chars)", len(data["event_id"]), **client_metadata(client, project) ) data["event_id"] = uuid.uuid4().hex if "timestamp" in data: try: process_data_timestamp(data) except InvalidTimestamp as e: # Log the error, remove the timestamp, and continue logger.info( "Discarded invalid value for timestamp: %r", data["timestamp"], **client_metadata(client, project, exception=e) ) del data["timestamp"] if data.get("modules") and type(data["modules"]) != dict: logger.info("Discarded invalid type for modules: %s", type(data["modules"]), **client_metadata(client, project)) del data["modules"] if data.get("extra") is not None and type(data["extra"]) != dict: logger.info("Discarded invalid type for extra: %s", type(data["extra"]), **client_metadata(client, project)) del data["extra"] if data.get("tags") is not None: if type(data["tags"]) == dict: data["tags"] = data["tags"].items() elif not isinstance(data["tags"], (list, tuple)): logger.info("Discarded invalid type for tags: %s", type(data["tags"]), **client_metadata(client, project)) del data["tags"] if data.get("tags"): # remove any values which are over 32 characters tags = [] for pair in data["tags"]: try: k, v = pair except ValueError: logger.info("Discarded invalid tag value: %r", pair, **client_metadata(client, project)) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: logger.info("Discarded invalid tag key: %r", type(k), **client_metadata(client, project)) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: logger.info("Discarded invalid tag value: %s=%r", k, type(v), **client_metadata(client, project)) continue if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH: logger.info("Discarded invalid tag: %s=%s", k, v, **client_metadata(client, project)) continue tags.append((k, v)) data["tags"] = tags for k in data.keys(): if k in RESERVED_FIELDS: continue value = data.pop(k) if not value: logger.info("Ignored empty interface value: %s", k, **client_metadata(client, project)) continue try: interface = get_interface(k) except ValueError: logger.info("Ignored unknown attribute: %s", k, **client_metadata(client, project)) continue if type(value) != dict: # HACK(dcramer): the exception interface supports a list as the # value. We should change this in a new protocol version. if type(value) in (list, tuple): value = {"values": value} else: logger.info("Invalid parameters for value: %s", k, type(value), **client_metadata(client, project)) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, AssertionError): log = logger.info else: log = logger.error log( "Discarded invalid value for interface: %s", k, **client_metadata(client, project, exception=e, extra={"value": value}) ) level = data.get("level") or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data["level"] = LOG_LEVEL_REVERSE_MAP[level] except KeyError as e: logger.info("Discarded invalid logger value: %s", level, **client_metadata(client, project, exception=e)) data["level"] = LOG_LEVEL_REVERSE_MAP.get(DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) return data
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if TagKey.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('message', '') data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict( data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in data.keys(): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: pass data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get( 'env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault( 'ip_address', ip_address) if data['time_spent']: data['time_spent'] = int(data['time_spent']) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['message']: data['message'] = trim( data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH) return data
def validate_data(self, project, data): # TODO(dcramer): move project out of the data packet data['project'] = project.id data['errors'] = [] if not data.get('message'): data['message'] = '<no message value>' elif not isinstance(data['message'], six.string_types): raise APIForbidden('Invalid value for message') if data.get('culprit'): if not isinstance(data['culprit'], six.string_types): raise APIForbidden('Invalid value for culprit') if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex elif not isinstance(data['event_id'], six.string_types): raise APIForbidden('Invalid value for event_id') if len(data['event_id']) > 32: self.log.info( 'Discarded value for event_id due to length (%d chars)', len(data['event_id'])) data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'event_id', 'value': data['event_id'], }) data['event_id'] = uuid.uuid4().hex if 'timestamp' in data: try: self._process_data_timestamp(data) except InvalidTimestamp as e: self.log.info( 'Discarded invalid value for timestamp: %r', data['timestamp'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'timestamp', 'value': data['timestamp'], }) del data['timestamp'] if 'fingerprint' in data: try: self._process_fingerprint(data) except InvalidFingerprint as e: self.log.info( 'Discarded invalid value for fingerprint: %r', data['fingerprint'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'fingerprint', 'value': data['fingerprint'], }) del data['fingerprint'] if 'platform' not in data or data['platform'] not in VALID_PLATFORMS: data['platform'] = 'other' if data.get('modules') and type(data['modules']) != dict: self.log.info( 'Discarded invalid type for modules: %s', type(data['modules'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'modules', 'value': data['modules'], }) del data['modules'] if data.get('extra') is not None and type(data['extra']) != dict: self.log.info( 'Discarded invalid type for extra: %s', type(data['extra'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'extra', 'value': data['extra'], }) del data['extra'] if data.get('tags') is not None: if type(data['tags']) == dict: data['tags'] = data['tags'].items() elif not isinstance(data['tags'], (list, tuple)): self.log.info( 'Discarded invalid type for tags: %s', type(data['tags'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': data['tags'], }) del data['tags'] if data.get('tags'): # remove any values which are over 32 characters tags = [] for pair in data['tags']: try: k, v = pair except ValueError: self.log.info('Discarded invalid tag value: %r', pair) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: self.log.info('Discarded invalid tag key: %r', type(k)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: self.log.info('Discarded invalid tag value: %s=%r', k, type(v)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH: self.log.info('Discarded invalid tag: %s=%s', k, v) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue # support tags with spaces by converting them k = k.replace(' ', '-') if TagKey.is_reserved_key(k): self.log.info('Discarding reserved tag key: %s', k) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not TagKey.is_valid_key(k): self.log.info('Discarded invalid tag key: %s', k) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue tags.append((k, v)) data['tags'] = tags for k in data.keys(): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.log.info('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.log.info('Ignored unknown attribute: %s', k) data['errors'].append({ 'type': EventError.INVALID_ATTRIBUTE, 'name': k, }) continue if type(value) != dict: # HACK(dcramer): the exception/breadcrumbs interface supports a # list as the value. We should change this in a new protocol # version. if type(value) in (list, tuple): value = {'values': value} else: self.log.info( 'Invalid parameter for value: %s (%r)', k, type(value)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, InterfaceValidationError): log = self.log.info else: log = self.log.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data['level'] = LOG_LEVEL_REVERSE_MAP[level] except KeyError as e: self.log.info( 'Discarded invalid logger value: %s', level) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'level', 'value': level, }) data['level'] = LOG_LEVEL_REVERSE_MAP.get( DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) if data.get('release'): data['release'] = unicode(data['release']) if len(data['release']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'release', 'value': data['release'], }) del data['release'] return data
def validate_data(self, project, data): # TODO(dcramer): move project out of the data packet data['project'] = project.id data['errors'] = [] if data.get('culprit'): if not isinstance(data['culprit'], six.string_types): raise APIForbidden('Invalid value for culprit') if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex elif not isinstance(data['event_id'], six.string_types): raise APIForbidden('Invalid value for event_id') if len(data['event_id']) > 32: self.log.debug( 'Discarded value for event_id due to length (%d chars)', len(data['event_id'])) data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'event_id', 'value': data['event_id'], }) data['event_id'] = uuid.uuid4().hex elif not is_event_id(data['event_id']): self.log.debug( 'Discarded invalid value for event_id: %r', data['event_id'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'event_id', 'value': data['event_id'], }) data['event_id'] = uuid.uuid4().hex if 'timestamp' in data: try: self._process_data_timestamp(data) except InvalidTimestamp as e: self.log.debug( 'Discarded invalid value for timestamp: %r', data['timestamp'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'timestamp', 'value': data['timestamp'], }) del data['timestamp'] if 'fingerprint' in data: try: self._process_fingerprint(data) except InvalidFingerprint as e: self.log.debug( 'Discarded invalid value for fingerprint: %r', data['fingerprint'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'fingerprint', 'value': data['fingerprint'], }) del data['fingerprint'] if 'platform' not in data or data['platform'] not in VALID_PLATFORMS: data['platform'] = 'other' if data.get('modules') and type(data['modules']) != dict: self.log.debug( 'Discarded invalid type for modules: %s', type(data['modules'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'modules', 'value': data['modules'], }) del data['modules'] if data.get('extra') is not None and type(data['extra']) != dict: self.log.debug( 'Discarded invalid type for extra: %s', type(data['extra'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'extra', 'value': data['extra'], }) del data['extra'] if data.get('tags') is not None: if type(data['tags']) == dict: data['tags'] = list(data['tags'].items()) elif not isinstance(data['tags'], (list, tuple)): self.log.debug( 'Discarded invalid type for tags: %s', type(data['tags'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': data['tags'], }) del data['tags'] if data.get('tags'): # remove any values which are over 32 characters tags = [] for pair in data['tags']: try: k, v = pair except ValueError: self.log.debug('Discarded invalid tag value: %r', pair) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: self.log.debug('Discarded invalid tag key: %r', type(k)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: self.log.debug('Discarded invalid tag value: %s=%r', k, type(v)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH: self.log.debug('Discarded invalid tag: %s=%s', k, v) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue # support tags with spaces by converting them k = k.replace(' ', '-') if TagKey.is_reserved_key(k): self.log.debug('Discarding reserved tag key: %s', k) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not TagKey.is_valid_key(k): self.log.debug('Discarded invalid tag key: %s', k) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not TagValue.is_valid_value(v): self.log.debug('Discard invalid tag value: %s', v) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue tags.append((k, v)) data['tags'] = tags for k in list(iter(data)): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.log.debug('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.log.debug('Ignored unknown attribute: %s', k) data['errors'].append({ 'type': EventError.INVALID_ATTRIBUTE, 'name': k, }) continue if type(value) != dict: # HACK(dcramer): the exception/breadcrumbs interface supports a # list as the value. We should change this in a new protocol # version. if type(value) in (list, tuple): value = {'values': value} else: self.log.debug( 'Invalid parameter for value: %s (%r)', k, type(value)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, InterfaceValidationError): log = self.log.debug else: log = self.log.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) # TODO(dcramer): ideally this logic would happen in normalize, but today # we don't do "validation" there (create errors) # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop('message', None) if message: if 'sentry.interfaces.Message' not in data: value = { 'message': message, } elif not data['sentry.interfaces.Message'].get('formatted'): value = data['sentry.interfaces.Message'] value['formatted'] = message else: value = None if value is not None: k = 'sentry.interfaces.Message' interface = get_interface(k) try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, InterfaceValidationError): log = self.log.debug else: log = self.log.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data['level'] = LOG_LEVELS_MAP[level] except KeyError as e: self.log.debug( 'Discarded invalid logger value: %s', level) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'level', 'value': level, }) data['level'] = LOG_LEVELS_MAP.get( DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) if data.get('release'): data['release'] = six.text_type(data['release']) if len(data['release']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'release', 'value': data['release'], }) del data['release'] if data.get('dist'): data['dist'] = six.text_type(data['dist']).strip() if not data.get('release'): data['dist'] = None elif len(data['dist']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'dist', 'value': data['dist'], }) del data['dist'] elif _dist_re.match(data['dist']) is None: data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'dist', 'value': data['dist'], }) del data['dist'] if data.get('environment'): data['environment'] = six.text_type(data['environment']) if len(data['environment']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'environment', 'value': data['environment'], }) del data['environment'] if data.get('time_spent'): try: data['time_spent'] = int(data['time_spent']) except (ValueError, TypeError): data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'time_spent', 'value': data['time_spent'], }) del data['time_spent'] else: if data['time_spent'] > BoundedIntegerField.MAX_VALUE: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'time_spent', 'value': data['time_spent'], }) del data['time_spent'] return data
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if TagKey.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('message', '') data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in data.keys(): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: pass data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault( 'ip_address', ip_address) if data['time_spent']: data['time_spent'] = int(data['time_spent']) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['message']: data['message'] = trim(data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH) return data
def validate_data(self, project, data): # TODO(dcramer): move project out of the data packet data['project'] = project.id data['errors'] = [] if data.get('culprit'): if not isinstance(data['culprit'], six.string_types): raise APIForbidden('Invalid value for culprit') if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex elif not isinstance(data['event_id'], six.string_types): raise APIForbidden('Invalid value for event_id') if len(data['event_id']) > 32: self.log.debug( 'Discarded value for event_id due to length (%d chars)', len(data['event_id'])) data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'event_id', 'value': data['event_id'], }) data['event_id'] = uuid.uuid4().hex elif not is_event_id(data['event_id']): self.log.debug('Discarded invalid value for event_id: %r', data['event_id'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'event_id', 'value': data['event_id'], }) data['event_id'] = uuid.uuid4().hex if 'timestamp' in data: try: self._process_data_timestamp(data) except InvalidTimestamp as e: self.log.debug('Discarded invalid value for timestamp: %r', data['timestamp'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'timestamp', 'value': data['timestamp'], }) del data['timestamp'] if 'fingerprint' in data: try: self._process_fingerprint(data) except InvalidFingerprint as e: self.log.debug('Discarded invalid value for fingerprint: %r', data['fingerprint'], exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'fingerprint', 'value': data['fingerprint'], }) del data['fingerprint'] if 'platform' not in data or data['platform'] not in VALID_PLATFORMS: data['platform'] = 'other' if data.get('modules') and type(data['modules']) != dict: self.log.debug('Discarded invalid type for modules: %s', type(data['modules'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'modules', 'value': data['modules'], }) del data['modules'] if data.get('extra') is not None and type(data['extra']) != dict: self.log.debug('Discarded invalid type for extra: %s', type(data['extra'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'extra', 'value': data['extra'], }) del data['extra'] if data.get('tags') is not None: if type(data['tags']) == dict: data['tags'] = list(data['tags'].items()) elif not isinstance(data['tags'], (list, tuple)): self.log.debug('Discarded invalid type for tags: %s', type(data['tags'])) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': data['tags'], }) del data['tags'] if data.get('tags'): # remove any values which are over 32 characters tags = [] for pair in data['tags']: try: k, v = pair except ValueError: self.log.debug('Discarded invalid tag value: %r', pair) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: self.log.debug('Discarded invalid tag key: %r', type(k)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: self.log.debug('Discarded invalid tag value: %s=%r', k, type(v)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if len(k) > MAX_TAG_KEY_LENGTH or len( v) > MAX_TAG_VALUE_LENGTH: self.log.debug('Discarded invalid tag: %s=%s', k, v) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue # support tags with spaces by converting them k = k.replace(' ', '-') if tagstore.is_reserved_key(k): self.log.debug('Discarding reserved tag key: %s', k) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not tagstore.is_valid_key(k): self.log.debug('Discarded invalid tag key: %s', k) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue if not tagstore.is_valid_value(v): self.log.debug('Discard invalid tag value: %s', v) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'tags', 'value': pair, }) continue tags.append((k, v)) data['tags'] = tags for k in list(iter(data)): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.log.debug('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.log.debug('Ignored unknown attribute: %s', k) data['errors'].append({ 'type': EventError.INVALID_ATTRIBUTE, 'name': k, }) continue if type(value) != dict: # HACK(dcramer): the exception/breadcrumbs interface supports a # list as the value. We should change this in a new protocol # version. if type(value) in (list, tuple): value = {'values': value} else: self.log.debug('Invalid parameter for value: %s (%r)', k, type(value)) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, InterfaceValidationError): log = self.log.debug else: log = self.log.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) # TODO(dcramer): ideally this logic would happen in normalize, but today # we don't do "validation" there (create errors) # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop('message', None) if message: if 'sentry.interfaces.Message' not in data: value = { 'message': message, } elif not data['sentry.interfaces.Message'].get('formatted'): value = data['sentry.interfaces.Message'] value['formatted'] = message else: value = None if value is not None: k = 'sentry.interfaces.Message' interface = get_interface(k) try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, InterfaceValidationError): log = self.log.debug else: log = self.log.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value, }) level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data['level'] = LOG_LEVELS_MAP[level] except KeyError as e: self.log.debug('Discarded invalid logger value: %s', level) data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'level', 'value': level, }) data['level'] = LOG_LEVELS_MAP.get(DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) if data.get('release'): data['release'] = six.text_type(data['release']) if len(data['release']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'release', 'value': data['release'], }) del data['release'] if data.get('dist'): data['dist'] = six.text_type(data['dist']).strip() if not data.get('release'): data['dist'] = None elif len(data['dist']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'dist', 'value': data['dist'], }) del data['dist'] elif _dist_re.match(data['dist']) is None: data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'dist', 'value': data['dist'], }) del data['dist'] if data.get('environment'): data['environment'] = six.text_type(data['environment']) if len(data['environment']) > 64: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'environment', 'value': data['environment'], }) del data['environment'] if data.get('time_spent'): try: data['time_spent'] = int(data['time_spent']) except (ValueError, TypeError): data['errors'].append({ 'type': EventError.INVALID_DATA, 'name': 'time_spent', 'value': data['time_spent'], }) del data['time_spent'] else: if data['time_spent'] > BoundedIntegerField.MAX_VALUE: data['errors'].append({ 'type': EventError.VALUE_TOO_LONG, 'name': 'time_spent', 'value': data['time_spent'], }) del data['time_spent'] return data
def validate_data(self, project, data): # TODO(dcramer): move project out of the data packet data['project'] = project.id if not data.get('message'): data['message'] = '<no message value>' elif not isinstance(data['message'], six.string_types): raise APIForbidden('Invalid value for message') if data.get('culprit'): if not isinstance(data['culprit'], six.string_types): raise APIForbidden('Invalid value for culprit') if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex elif not isinstance(data['event_id'], six.string_types): raise APIForbidden('Invalid value for event_id') if len(data['event_id']) > 32: self.log.info( 'Discarded value for event_id due to length (%d chars)', len(data['event_id'])) data['event_id'] = uuid.uuid4().hex if 'timestamp' in data: try: self._process_data_timestamp(data) except InvalidTimestamp as e: self.log.info( 'Discarded invalid value for timestamp: %r', data['timestamp'], exc_info=True) del data['timestamp'] if data.get('modules') and type(data['modules']) != dict: self.log.info( 'Discarded invalid type for modules: %s', type(data['modules'])) del data['modules'] if data.get('extra') is not None and type(data['extra']) != dict: self.log.info( 'Discarded invalid type for extra: %s', type(data['extra'])) del data['extra'] if data.get('tags') is not None: if type(data['tags']) == dict: data['tags'] = data['tags'].items() elif not isinstance(data['tags'], (list, tuple)): self.log.info( 'Discarded invalid type for tags: %s', type(data['tags'])) del data['tags'] if data.get('tags'): # remove any values which are over 32 characters tags = [] for pair in data['tags']: try: k, v = pair except ValueError: self.log.info('Discarded invalid tag value: %r', pair) continue if not isinstance(k, six.string_types): try: k = six.text_type(k) except Exception: self.log.info('Discarded invalid tag key: %r', type(k)) continue if not isinstance(v, six.string_types): try: v = six.text_type(v) except Exception: self.log.info('Discarded invalid tag value: %s=%r', k, type(v)) continue if len(k) > MAX_TAG_KEY_LENGTH or len(v) > MAX_TAG_VALUE_LENGTH: self.log.info('Discarded invalid tag: %s=%s', k, v) continue tags.append((k, v)) data['tags'] = tags for k in data.keys(): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.log.info('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.log.info('Ignored unknown attribute: %s', k) continue if type(value) != dict: # HACK(dcramer): the exception interface supports a list as the # value. We should change this in a new protocol version. if type(value) in (list, tuple): value = {'values': value} else: self.log.info( 'Invalid parameter for value: %s (%r)', k, type(value)) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: if isinstance(e, AssertionError): log = self.log.info else: log = self.log.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, six.string_types) and not level.isdigit(): # assume it's something like 'warning' try: data['level'] = LOG_LEVEL_REVERSE_MAP[level] except KeyError as e: self.log.info( 'Discarded invalid logger value: %s', level) data['level'] = LOG_LEVEL_REVERSE_MAP.get( DEFAULT_LOG_LEVEL, DEFAULT_LOG_LEVEL) if data.get('release'): data['release'] = unicode(data['release']) return data