def to_python(cls, data): data = data.copy() extra_data = data.pop('data', data) if not isinstance(extra_data, dict): extra_data = {} try: name = trim(data.pop('name'), 64) except KeyError: raise InterfaceValidationError("Missing or invalid value for 'name'") try: version = trim(data.pop('version'), 64) except KeyError: raise InterfaceValidationError("Missing or invalid value for 'version'") build = trim(data.pop('build', None), 64) kwargs = { 'name': name, 'version': version, 'build': build, 'data': trim_dict(data), } kwargs['data'] = trim_dict(data) return cls(**kwargs)
def to_python(cls, data): data = data.copy() extra_data = data.pop("data", data) if not isinstance(extra_data, dict): extra_data = {} ident = trim(data.pop("id", None), 128) if ident: ident = unicode(ident) try: email = trim(validate_email(data.pop("email", None), False), 128) except ValueError: raise InterfaceValidationError("Invalid value for 'email'") username = trim(data.pop("username", None), 128) if username: username = unicode(username) try: ip_address = validate_ip(data.pop("ip_address", None), False) except ValueError: raise InterfaceValidationError("Invalid value for 'ip_address'") # TODO(dcramer): patch in fix to deal w/ old data but not allow new # if not (ident or email or username or ip_address): # raise ValueError('No identifying value') kwargs = {"id": ident, "email": email, "username": username, "ip_address": ip_address} kwargs["data"] = trim_dict(extra_data) return cls(**kwargs)
def to_python(cls, data): if not data.get('url'): raise InterfaceValidationError("No value for 'url'") kwargs = {} if data.get('method'): method = data['method'].upper() if method not in HTTP_METHODS: raise InterfaceValidationError("Invalid value for 'method'") kwargs['method'] = method else: kwargs['method'] = None scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url']) query_string = data.get('query_string') or query_bit if query_string: # if querystring was a dict, convert it to a string if isinstance(query_string, dict): query_string = urlencode([(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]) else: query_string = query_string if query_string[0] == '?': # remove '?' prefix query_string = query_string[1:] kwargs['query_string'] = trim(query_string, 4096) else: kwargs['query_string'] = '' fragment = data.get('fragment') or fragment_bit cookies = data.get('cookies') # if cookies were [also] included in headers we # strip them out headers = data.get('headers') if headers: headers, cookie_header = format_headers(headers) if not cookies and cookie_header: cookies = cookie_header else: headers = () body = data.get('data') if isinstance(body, dict): body = json.dumps(body) if body: body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE) kwargs['cookies'] = trim_pairs(format_cookies(cookies)) kwargs['env'] = trim_dict(data.get('env') or {}) kwargs['headers'] = trim_pairs(headers) kwargs['data'] = body kwargs['url'] = urlunsplit((scheme, netloc, path, '', '')) kwargs['fragment'] = trim(fragment, 1024) return cls(**kwargs)
def to_python(cls, data): abs_path = data.get('abs_path') filename = data.get('filename') if not abs_path: abs_path = filename if not filename: filename = abs_path if abs_path and is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path assert filename or data.get('function') or data.get('module') context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) kwargs = { 'abs_path': trim(abs_path, 256), 'filename': trim(filename, 256), 'module': trim(data.get('module'), 256), 'function': trim(data.get('function'), 256), 'in_app': validate_bool(data.get('in_app'), False), 'context_line': trim(data.get('context_line'), 256), # TODO(dcramer): trim pre/post_context 'pre_context': data.get('pre_context'), 'post_context': data.get('post_context'), 'vars': context_locals, 'data': extra_data, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def to_python(cls, data): abs_path = data.get("abs_path") filename = data.get("filename") if not abs_path: abs_path = filename if not filename: filename = abs_path if abs_path and is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path assert filename or data.get("function") or data.get("module") context_locals = data.get("vars") or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get("data") or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) kwargs = { "abs_path": trim(abs_path, 256), "filename": trim(filename, 256), "module": trim(data.get("module"), 256), "function": trim(data.get("function"), 256), "in_app": validate_bool(data.get("in_app"), False), "context_line": trim(data.get("context_line"), 256), # TODO(dcramer): trim pre/post_context "pre_context": data.get("pre_context"), "post_context": data.get("post_context"), "vars": context_locals, "data": extra_data, } if data.get("lineno") is not None: kwargs["lineno"] = int(data["lineno"]) else: kwargs["lineno"] = None if data.get("colno") is not None: kwargs["colno"] = int(data["colno"]) else: kwargs["colno"] = None return cls(**kwargs)
def to_python(cls, data): data = data.copy() kwargs = { 'id': trim(data.pop('id', None), 128), 'email': trim(data.pop('email', None), 128), 'username': trim(data.pop('username', None), 128), 'ip_address': validate_ip(data.pop('ip_address', None), False), } kwargs['data'] = trim_dict(data.pop('data', data)) return cls(**kwargs)
def to_python(cls, data): data = data.copy() ident = data.pop('id', None) if ident is not None: ident = trim(six.text_type(ident), 128) try: email = trim(validate_email(data.pop('email', None), False), MAX_EMAIL_FIELD_LENGTH) except ValueError: raise InterfaceValidationError("Invalid value for 'email'") username = data.pop('username', None) if username is not None: username = trim(six.text_type(username), 128) name = data.pop('name', None) if name is not None: name = trim(six.text_type(name), 128) try: ip_address = validate_ip(data.pop('ip_address', None), False) except ValueError: raise InterfaceValidationError("Invalid value for 'ip_address'") geo = data.pop('geo', None) if not geo and ip_address: geo = Geo.from_ip_address(ip_address) elif geo: geo = Geo.to_python(geo) extra_data = data.pop('data', None) if not isinstance(extra_data, dict): extra_data = {} extra_data.update(data) # TODO(dcramer): patch in fix to deal w/ old data but not allow new # if not (ident or email or username or ip_address): # raise ValueError('No identifying value') kwargs = { 'id': ident, 'email': email, 'username': username, 'ip_address': ip_address, 'name': name, 'geo': geo, 'data': trim_dict(extra_data) } return cls(**kwargs)
def to_python(cls, data): data = data.copy() extra_data = data.pop('data', data) if not isinstance(extra_data, dict): extra_data = {} kwargs = { 'id': trim(data.pop('id', None), 128), 'email': trim(data.pop('email', None), 128), 'username': trim(data.pop('username', None), 128), 'ip_address': validate_ip(data.pop('ip_address', None), False), } kwargs['data'] = trim_dict(extra_data) return cls(**kwargs)
def to_python(cls, data): data = data.copy() extra_data = data.pop("data", data) if not isinstance(extra_data, dict): extra_data = {} kwargs = { "id": trim(data.pop("id", None), 128), "email": trim(data.pop("email", None), 128), "username": trim(data.pop("username", None), 128), "ip_address": validate_ip(data.pop("ip_address", None), False), } kwargs["data"] = trim_dict(extra_data) return cls(**kwargs)
def to_python(cls, data): data = data.copy() extra_data = data.pop("data", data) if not isinstance(extra_data, dict): extra_data = {} ident = trim(data.pop("id", None), 128) email = trim(validate_email(data.pop("email", None), False), 128) username = trim(data.pop("username", None), 128) ip_address = validate_ip(data.pop("ip_address", None), False) if not (ident or email or username or ip_address): raise ValueError("No identifying value") kwargs = {"id": ident, "email": email, "username": username, "ip_address": ip_address} kwargs["data"] = trim_dict(extra_data) return cls(**kwargs)
def to_python(cls, data): is_valid, errors = validate_and_default_interface(data, cls.path) if not is_valid: raise InterfaceValidationError("Invalid device") data = data.copy() extra_data = data.pop('data', data) name = trim(data.pop('name'), 64) version = trim(data.pop('version'), 64) build = trim(data.pop('build', None), 64) kwargs = { 'name': name, 'version': version, 'build': build, 'data': trim_dict(extra_data), } return cls(**kwargs)
def to_python(cls, data): data = data.copy() extra_data = data.pop('data', data) if not isinstance(extra_data, dict): extra_data = {} ident = trim(data.pop('id', None), 128) if ident: ident = six.text_type(ident) try: email = trim(validate_email(data.pop('email', None), False), MAX_EMAIL_FIELD_LENGTH) except ValueError: raise InterfaceValidationError("Invalid value for 'email'") username = trim(data.pop('username', None), 128) if username: username = six.text_type(username) name = trim(data.pop('name', None), 128) if name: name = six.text_type(name) try: ip_address = validate_ip(data.pop('ip_address', None), False) except ValueError: raise InterfaceValidationError("Invalid value for 'ip_address'") # TODO(dcramer): patch in fix to deal w/ old data but not allow new # if not (ident or email or username or ip_address): # raise ValueError('No identifying value') kwargs = { 'id': ident, 'email': email, 'username': username, 'ip_address': ip_address, 'name': name, } kwargs['data'] = trim_dict(extra_data) return cls(**kwargs)
def to_python(cls, data): data = data.copy() extra_data = data.pop('data', data) if not isinstance(extra_data, dict): extra_data = {} ident = trim(data.pop('id', None), 128) email = trim(validate_email(data.pop('email', None), False), 128) username = trim(data.pop('username', None), 128) ip_address = validate_ip(data.pop('ip_address', None), False) assert ident or email or username or ip_address kwargs = { 'id': ident, 'email': email, 'username': username, 'ip_address': ip_address, } kwargs['data'] = trim_dict(extra_data) return cls(**kwargs)
def test_large_dict(self): value = {k: k for k in range(500)} trim_dict(value) assert len(value) == 50
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get("level"), (six.string_types, int)): data["level"] = logging.ERROR elif data["level"] not in LOG_LEVELS: data["level"] = logging.ERROR if not data.get("logger"): data["logger"] = DEFAULT_LOGGER_NAME else: logger = trim(data["logger"].strip(), 64) if TagKey.is_valid_key(logger): data["logger"] = logger else: data["logger"] = DEFAULT_LOGGER_NAME if data.get("platform"): data["platform"] = trim(data["platform"], 64) current_timestamp = timezone.now() timestamp = data.get("timestamp") if not timestamp: timestamp = current_timestamp if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime("%s")) data["timestamp"] = timestamp data["received"] = float(timezone.now().strftime("%s")) if not data.get("event_id"): data["event_id"] = uuid4().hex data.setdefault("culprit", None) data.setdefault("server_name", None) data.setdefault("site", None) data.setdefault("checksum", None) data.setdefault("fingerprint", None) data.setdefault("platform", None) data.setdefault("environment", None) data.setdefault("extra", {}) data.setdefault("errors", []) tags = data.get("tags") if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data["tags"] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue data["tags"].append((key, value)) # XXX(dcramer): many legacy apps are using the environment tag # rather than the key itself if key == "environment" and not data.get("environment"): data["environment"] = value if not isinstance(data["extra"], dict): # throw it away data["extra"] = {} trim_dict(data["extra"], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in data.keys(): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: # XXX: we should consider logging this. pass # TODO(dcramer): this logic is duplicated in ``validate_data`` from # coreapi # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop("message", None) if message: if "sentry.interfaces.Message" not in data: interface = get_interface("sentry.interfaces.Message") try: inst = interface.to_python({"message": message}) data[inst.get_path()] = inst.to_json() except Exception: pass elif not data["sentry.interfaces.Message"].get("formatted"): interface = get_interface("sentry.interfaces.Message") try: inst = interface.to_python(dict(data["sentry.interfaces.Message"], formatted=message)) data[inst.get_path()] = inst.to_json() except Exception: pass # the SDKs currently do not describe event types, and we must infer # them from available attributes data["type"] = eventtypes.infer(data).key data["version"] = self.version # TODO(dcramer): find a better place for this logic exception = data.get("sentry.interfaces.Exception") stacktrace = data.get("sentry.interfaces.Stacktrace") if exception and len(exception["values"]) == 1 and stacktrace: exception["values"][0]["stacktrace"] = stacktrace del data["sentry.interfaces.Stacktrace"] if "sentry.interfaces.Http" in data: try: ip_address = validate_ip( data["sentry.interfaces.Http"].get("env", {}).get("REMOTE_ADDR"), required=False ) except ValueError: ip_address = None if ip_address: data.setdefault("sentry.interfaces.User", {}) data["sentry.interfaces.User"].setdefault("ip_address", ip_address) if data["culprit"]: data["culprit"] = trim(data["culprit"], MAX_CULPRIT_LENGTH) return data
def test_large_dict(self): value = dict((k, k) for k in range(500)) trim_dict(value) assert len(value) == 50
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if TagKey.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('message', '') data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict( data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in data.keys(): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: pass data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get( 'env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault( 'ip_address', ip_address) if data['time_spent']: data['time_spent'] = int(data['time_spent']) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['message']: data['message'] = trim( data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH) return data
def normalize_event_data(self, data): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs if not isinstance(data.get('level'), (basestring, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: data['logger'] = trim(data['logger'], 64) if data.get('platform'): data['platform'] = trim(data['platform'], 64) timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex data.setdefault('message', None) data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('platform', None) data.setdefault('extra', {}) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = tags if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict( data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO: each interface should describe its own normalization logic if 'sentry.interfaces.Exception' in data: if 'values' not in data['sentry.interfaces.Exception']: data['sentry.interfaces.Exception'] = { 'values': [data['sentry.interfaces.Exception']] } # convert stacktrace + exception into expanded exception if 'sentry.interfaces.Stacktrace' in data: data['sentry.interfaces.Exception']['values'][0]['stacktrace'] = data.pop('sentry.interfaces.Stacktrace') for exc_data in data['sentry.interfaces.Exception']['values']: for key in ('type', 'module', 'value'): value = exc_data.get(key) if value: exc_data[key] = trim(value) if exc_data.get('stacktrace'): trim_frames(exc_data['stacktrace']) for frame in exc_data['stacktrace']['frames']: stack_vars = frame.get('vars', {}) trim_dict(stack_vars) for key, value in frame.iteritems(): if key not in ('vars', 'data'): frame[key] = trim(value) if 'sentry.interfaces.Stacktrace' in data: trim_frames(data['sentry.interfaces.Stacktrace']) for frame in data['sentry.interfaces.Stacktrace']['frames']: stack_vars = frame.get('vars', {}) trim_dict(stack_vars) for key, value in frame.iteritems(): if key not in ('vars', 'data'): frame[key] = trim(value) if 'sentry.interfaces.Message' in data: msg_data = data['sentry.interfaces.Message'] trim(msg_data['message'], 1024) if msg_data.get('params'): msg_data['params'] = trim(msg_data['params']) if 'sentry.interfaces.Http' in data: http_data = data['sentry.interfaces.Http'] for key in ('cookies', 'querystring', 'headers', 'env', 'url'): value = http_data.get(key) if not value: continue if type(value) == dict: trim_dict(value) else: http_data[key] = trim(value) value = http_data.get('data') if value: http_data['data'] = trim(value, 2048) # default the culprit to the url if not data['culprit']: data['culprit'] = strip(http_data.get('url')) if data['culprit']: data['culprit'] = trim(strip(data['culprit']), MAX_CULPRIT_LENGTH) if data['message']: data['message'] = strip(data['message']) return data
def to_python(cls, data, raw=False): is_valid, errors = validate_and_default_interface(data, cls.path) if not is_valid: raise InterfaceValidationError("Invalid stack frame data.") abs_path = data.get('abs_path') filename = data.get('filename') symbol = data.get('symbol') function = data.get('function') module = data.get('module') package = data.get('package') # For legacy reasons if function in ('?', ''): function = None # For consistency reasons if symbol in ('?', ''): symbol = None # Some of this processing should only be done for non raw frames if not raw: # absolute path takes priority over filename # (in the end both will get set) if not abs_path: abs_path = filename filename = None if not filename and abs_path: if is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path else: filename = abs_path else: filename = abs_path platform = data.get('platform') context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals, object_hook=handle_nan) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get('context_line'), 256) if context_line is not None: pre_context = data.get('pre_context', None) if pre_context: pre_context = [c or '' for c in pre_context] post_context = data.get('post_context', None) if post_context: post_context = [c or '' for c in post_context] else: pre_context, post_context = None, None in_app = validate_bool(data.get('in_app'), False) kwargs = { 'abs_path': trim(abs_path, 2048), 'filename': trim(filename, 256), 'platform': platform, 'module': trim(module, 256), 'function': trim(function, 256), 'package': package, 'image_addr': to_hex_addr(data.get('image_addr')), 'symbol': trim(symbol, 256), 'symbol_addr': to_hex_addr(data.get('symbol_addr')), 'instruction_addr': to_hex_addr(data.get('instruction_addr')), 'trust': trim(data.get('trust'), 16), 'in_app': in_app, 'context_line': context_line, # TODO(dcramer): trim pre/post_context 'pre_context': pre_context, 'post_context': post_context, 'vars': context_locals or None, 'data': extra_data or None, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def to_python(cls, data): assert data.get('url') kwargs = {} if data.get('method'): method = data['method'].upper() assert method in HTTP_METHODS kwargs['method'] = method else: kwargs['method'] = None scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url']) query_string = data.get('query_string') or query_bit if query_string: # if querystring was a dict, convert it to a string if isinstance(query_string, dict): query_string = urlencode(query_string.items()) else: query_string = query_string if query_string[0] == '?': # remove '?' prefix query_string = query_string[1:] kwargs['query_string'] = trim(query_string, 1024) else: kwargs['query_string'] = '' fragment = data.get('fragment') or fragment_bit cookies = data.get('cookies') # if cookies were [also] included in headers we # strip them out headers = data.get('headers') if headers: headers = format_headers(headers) if 'Cookie' in headers: if not cookies: cookies = headers.pop('Cookie') else: del headers['Cookie'] headers = trim_dict(headers) else: headers = {} body = data.get('data') if isinstance(body, (list, tuple)): body = trim_dict(dict(enumerate(body))) elif isinstance(body, dict): body = trim_dict(body) elif body: body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE) if headers.get('Content-Type') == cls.FORM_TYPE and '=' in body: body = dict(parse_qsl(body)) # if cookies were a string, convert to a dict # parse_qsl will parse both acceptable formats: # a=b&c=d # and # a=b;c=d if isinstance(cookies, basestring): cookies = dict(parse_qsl(cookies, keep_blank_values=True)) elif not cookies: cookies = {} kwargs['cookies'] = format_cookies(trim_dict(cookies)) kwargs['env'] = trim_dict(data.get('env') or {}) kwargs['headers'] = headers kwargs['data'] = body kwargs['url'] = urlunsplit((scheme, netloc, path, '', '')) kwargs['fragment'] = trim(fragment, 256) return cls(**kwargs)
def to_python(cls, data): abs_path = data.get('abs_path') filename = data.get('filename') function = data.get('function') module = data.get('module') for v in (abs_path, filename, function, module): assert isinstance(v, (string_types, NoneType)) # absolute path takes priority over filename # (in the end both will get set) if not abs_path: abs_path = filename filename = None if not filename and abs_path: if is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path else: filename = abs_path else: filename = abs_path assert filename or function or module context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals, object_hook=handle_nan) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get('context_line'), 256) if context_line is not None: pre_context = data.get('pre_context', None) if pre_context: pre_context = [c or '' for c in pre_context] post_context = data.get('post_context', None) if post_context: post_context = [c or '' for c in post_context] else: pre_context, post_context = None, None kwargs = { 'abs_path': trim(abs_path, 256), 'filename': trim(filename, 256), 'module': trim(module, 256), 'function': trim(function, 256), 'in_app': validate_bool(data.get('in_app'), False), 'context_line': context_line, # TODO(dcramer): trim pre/post_context 'pre_context': pre_context, 'post_context': post_context, 'vars': context_locals, 'data': extra_data, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def to_python(cls, data): abs_path = data.get('abs_path') filename = data.get('filename') function = data.get('function') module = data.get('module') for name in ('abs_path', 'filename', 'function', 'module'): if not isinstance(data.get(name), (string_types, NoneType)): raise InterfaceValidationError("Invalid value for '%s'" % name) # absolute path takes priority over filename # (in the end both will get set) if not abs_path: abs_path = filename filename = None if not filename and abs_path: if is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path else: filename = abs_path else: filename = abs_path if not (filename or function or module): raise InterfaceValidationError( "No 'filename' or 'function' or 'module'") if function == '?': function = None context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals, object_hook=handle_nan) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get('context_line'), 256) if context_line is not None: pre_context = data.get('pre_context', None) if pre_context: pre_context = [c or '' for c in pre_context] post_context = data.get('post_context', None) if post_context: post_context = [c or '' for c in post_context] else: pre_context, post_context = None, None try: in_app = validate_bool(data.get('in_app'), False) except AssertionError: raise InterfaceValidationError("Invalid value for 'in_app'") kwargs = { 'abs_path': trim(abs_path, 256), 'filename': trim(filename, 256), 'module': trim(module, 256), 'function': trim(function, 256), 'in_app': in_app, 'context_line': context_line, # TODO(dcramer): trim pre/post_context 'pre_context': pre_context, 'post_context': post_context, 'vars': context_locals, 'data': extra_data, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def to_python(cls, data): abs_path = data.get("abs_path") filename = data.get("filename") function = data.get("function") module = data.get("module") for name in ("abs_path", "filename", "function", "module"): if not isinstance(data.get(name), (string_types, NoneType)): raise InterfaceValidationError("Invalid value for '%s'" % name) # absolute path takes priority over filename # (in the end both will get set) if not abs_path: abs_path = filename filename = None if not filename and abs_path: if is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path else: filename = abs_path else: filename = abs_path if not (filename or function or module): raise InterfaceValidationError("No 'filename' or 'function' or 'module'") if function == "?": function = None platform = data.get("platform") if platform not in VALID_PLATFORMS: platform = None context_locals = data.get("vars") or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals, object_hook=handle_nan) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get("data") or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get("context_line"), 256) if context_line is not None: pre_context = data.get("pre_context", None) if pre_context: pre_context = [c or "" for c in pre_context] post_context = data.get("post_context", None) if post_context: post_context = [c or "" for c in post_context] else: pre_context, post_context = None, None try: in_app = validate_bool(data.get("in_app"), False) except AssertionError: raise InterfaceValidationError("Invalid value for 'in_app'") instruction_offset = data.get("instruction_offset") if instruction_offset is not None and not isinstance(instruction_offset, (int, long)): raise InterfaceValidationError("Invalid value for 'instruction_offset'") kwargs = { "abs_path": trim(abs_path, 256), "filename": trim(filename, 256), "platform": platform, "module": trim(module, 256), "function": trim(function, 256), "package": trim(data.get("package"), 256), "image_addr": to_hex_addr(trim(data.get("image_addr"), 16)), "symbol_addr": to_hex_addr(trim(data.get("symbol_addr"), 16)), "instruction_addr": to_hex_addr(trim(data.get("instruction_addr"), 16)), "instruction_offset": instruction_offset, "in_app": in_app, "context_line": context_line, # TODO(dcramer): trim pre/post_context "pre_context": pre_context, "post_context": post_context, "vars": context_locals, "data": extra_data, "errors": data.get("errors"), } if data.get("lineno") is not None: lineno = int(data["lineno"]) if lineno < 0: lineno = None kwargs["lineno"] = lineno else: kwargs["lineno"] = None if data.get("colno") is not None: kwargs["colno"] = int(data["colno"]) else: kwargs["colno"] = None return cls(**kwargs)
def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT): if rust_renormalized: data.setdefault('query_string', []) for key in ( "method", "url", "fragment", "cookies", "headers", "data", "env", "inferred_content_type", ): data.setdefault(key, None) return cls(**data) is_valid, errors = validate_and_default_interface(data, cls.path) if not is_valid: raise InterfaceValidationError("Invalid interface data") kwargs = {} if data.get('method'): method = data['method'].upper() # Optimize for the common path here, where it's a GET/POST, falling # back to a regular expresion test if method not in ('GET', 'POST') and not http_method_re.match(method): raise InterfaceValidationError("Invalid value for 'method'") kwargs['method'] = method else: kwargs['method'] = None if data.get('url', None): url = to_unicode(data['url']) # The JavaScript SDK used to send an ellipsis character for # truncated URLs. Canonical URLs do not contain UTF-8 characters in # either the path, query string or fragment, so we replace it with # three dots (which is the behavior of other SDKs). This effectively # makes the string two characters longer, but it will be trimmed # again down below. if url.endswith(u"\u2026"): url = url[:-1] + "..." scheme, netloc, path, query_bit, fragment_bit = urlsplit(url) else: scheme = netloc = path = query_bit = fragment_bit = None query_string = data.get('query_string') or query_bit if query_string: if isinstance(query_string, six.string_types): if query_string[0] == '?': query_string = query_string[1:] if query_string.endswith(u"\u2026"): query_string = query_string[:-1] + "..." query_string = [ (to_unicode(k), jsonify(v)) for k, v in parse_qsl(query_string, keep_blank_values=True) ] elif isinstance(query_string, dict): query_string = [(to_unicode(k), jsonify(v)) for k, v in six.iteritems(query_string)] elif isinstance(query_string, list): query_string = [ tuple(tup) for tup in query_string if isinstance(tup, (tuple, list)) and len(tup) == 2 ] else: query_string = [] kwargs['query_string'] = trim(query_string, 4096) else: kwargs['query_string'] = [] fragment = data.get('fragment') or fragment_bit cookies = data.get('cookies') # if cookies were [also] included in headers we # strip them out if data.get("headers"): headers, cookie_header = format_headers(get_path(data, "headers", filter=True)) if not cookies and cookie_header: cookies = cookie_header else: headers = () # We prefer the body to be a string, since we can then attempt to parse it # as JSON OR decode it as a URL encoded query string, without relying on # the correct content type header being passed. body = data.get('data') content_type = next((v for k, v in headers if k == 'Content-Type'), None) # Remove content type parameters if content_type is not None: content_type = content_type.partition(';')[0].rstrip() # We process request data once during ingestion and again when # requesting the http interface over the API. Avoid overwriting # decoding the body again. inferred_content_type = data.get('inferred_content_type', content_type) if 'inferred_content_type' not in data and not isinstance(body, dict): body, inferred_content_type = heuristic_decode(body, content_type) if body: body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE) env = data.get('env', {}) # TODO (alex) This could also be accomplished with schema (with formats) if 'REMOTE_ADDR' in env: try: validate_ip(env['REMOTE_ADDR'], required=False) except ValueError: del env['REMOTE_ADDR'] kwargs['inferred_content_type'] = inferred_content_type kwargs['cookies'] = trim_pairs(format_cookies(cookies)) kwargs['env'] = trim_dict(env) kwargs['headers'] = trim_pairs(headers) kwargs['data'] = fix_broken_encoding(body) kwargs['url'] = urlunsplit((scheme, netloc, path, '', '')) kwargs['fragment'] = trim(fragment, 1024) return cls(**kwargs)
def normalize_event_data(self, data): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs if not data.get("level") or data["level"] not in LOG_LEVELS: data["level"] = logging.ERROR if not data.get("logger"): data["logger"] = DEFAULT_LOGGER_NAME timestamp = data.get("timestamp") if not timestamp: timestamp = timezone.now() # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) data["timestamp"] = timestamp if not data.get("event_id"): data["event_id"] = uuid.uuid4().hex data.setdefault("message", None) data.setdefault("culprit", None) data.setdefault("time_spent", None) data.setdefault("server_name", None) data.setdefault("site", None) data.setdefault("checksum", None) data.setdefault("platform", None) data.setdefault("extra", {}) tags = data.get("tags") if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data["tags"] = tags data["message"] = strip(data["message"]) data["culprit"] = strip(data["culprit"]) if not isinstance(data["extra"], dict): # throw it away data["extra"] = {} trim_dict(data["extra"], max_size=MAX_EXTRA_VARIABLE_SIZE) if "sentry.interfaces.Exception" in data: if "values" not in data["sentry.interfaces.Exception"]: data["sentry.interfaces.Exception"] = {"values": [data["sentry.interfaces.Exception"]]} # convert stacktrace + exception into expanded exception if "sentry.interfaces.Stacktrace" in data: data["sentry.interfaces.Exception"]["values"][0]["stacktrace"] = data.pop( "sentry.interfaces.Stacktrace" ) for exc_data in data["sentry.interfaces.Exception"]["values"]: for key in ("type", "module", "value"): value = exc_data.get(key) if value: exc_data[key] = trim(value) if exc_data.get("stacktrace"): for frame in exc_data["stacktrace"]["frames"]: stack_vars = frame.get("vars", {}) trim_dict(stack_vars) if "sentry.interfaces.Stacktrace" in data: for frame in data["sentry.interfaces.Stacktrace"]["frames"]: stack_vars = frame.get("vars", {}) trim_dict(stack_vars) if "sentry.interfaces.Message" in data: msg_data = data["sentry.interfaces.Message"] trim(msg_data["message"], 1024) if msg_data.get("params"): msg_data["params"] = trim(msg_data["params"]) if "sentry.interfaces.Http" in data: http_data = data["sentry.interfaces.Http"] for key in ("cookies", "querystring", "headers", "env", "url"): value = http_data.get(key) if not value: continue if type(value) == dict: trim_dict(value) else: http_data[key] = trim(value) value = http_data.get("data") if value: http_data["data"] = trim(value, 2048) # default the culprit to the url if not data["culprit"]: data["culprit"] = trim(strip(http_data.get("url")), MAX_CULPRIT_LENGTH) return data
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if TagKey.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('message', '') data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in data.keys(): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: pass data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault( 'ip_address', ip_address) if data['time_spent']: data['time_spent'] = int(data['time_spent']) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['message']: data['message'] = trim(data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH) return data
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data data['version'] = self.version if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: data['logger'] = trim(data['logger'], 64) if data.get('platform'): data['platform'] = trim(data['platform'], 64) timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('message', None) data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('platform', None) data.setdefault('extra', {}) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = tags if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: # default the culprit to the url if not data['culprit']: data['culprit'] = data['sentry.interfaces.Http']['url'] if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['message']: data['message'] = trim(data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH) return data
def normalize(self, request_env=None): request_env = request_env or {} data = self.data errors = data['errors'] = [] # Before validating with a schema, attempt to cast values to their desired types # so that the schema doesn't have to take every type variation into account. text = six.text_type fp_types = six.string_types + six.integer_types + (float, ) def to_values(v): return {'values': v} if v and isinstance(v, (tuple, list)) else v casts = { 'environment': lambda v: text(v) if v is not None else v, 'fingerprint': lambda v: list(map(text, v)) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v, 'release': lambda v: text(v) if v is not None else v, 'dist': lambda v: text(v).strip() if v is not None else v, 'time_spent': lambda v: int(v) if v is not None else v, 'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()], 'timestamp': lambda v: process_timestamp(v), 'platform': lambda v: v if v in VALID_PLATFORMS else 'other', 'sentry.interfaces.Message': lambda v: v if isinstance(v, dict) else { 'message': v }, # These can be sent as lists and need to be converted to {'values': [...]} 'exception': to_values, 'sentry.interfaces.Exception': to_values, 'breadcrumbs': to_values, 'sentry.interfaces.Breadcrumbs': to_values, 'threads': to_values, 'sentry.interfaces.Threads': to_values, } for c in casts: if c in data: try: data[c] = casts[c](data[c]) except Exception as e: errors.append({ 'type': EventError.INVALID_DATA, 'name': c, 'value': data[c] }) del data[c] # raw 'message' is coerced to the Message interface, as its used for pure index of # searchable strings. If both a raw 'message' and a Message interface exist, try and # add the former as the 'formatted' attribute of the latter. # See GH-3248 msg_str = data.pop('message', None) if msg_str: msg_if = data.setdefault('sentry.interfaces.Message', {'message': msg_str}) if msg_if.get('message') != msg_str: msg_if.setdefault('formatted', msg_str) # Fill in ip addresses marked as {{auto}} client_ip = request_env.get('client_ip') if client_ip: if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR' ]) == '{{auto}}': data['sentry.interfaces.Http']['env'][ 'REMOTE_ADDR'] = client_ip if get_path( data, ['sentry.interfaces.User', 'ip_address']) == '{{auto}}': data['sentry.interfaces.User']['ip_address'] = client_ip # Validate main event body and tags against schema is_valid, event_errors = validate_and_default_interface(data, 'event') errors.extend(event_errors) if 'tags' in data: is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags') errors.extend(tag_errors) # Validate interfaces for k in list(iter(data)): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.logger.debug('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.logger.debug('Ignored unknown attribute: %s', k) errors.append({ 'type': EventError.INVALID_ATTRIBUTE, 'name': k }) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: log = self.logger.debug if isinstance( e, InterfaceValidationError) else self.logger.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) errors.append({ 'type': EventError.INVALID_DATA, 'name': k, 'value': value }) # Additional data coercion and defaulting level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()): level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL) data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL]) if data.get('dist') and not data.get('release'): data['dist'] = None timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() # TODO (alex) can this all be replaced by utcnow? # it looks like the only time that this would even be hit is when timestamp # is not defined, as the earlier process_timestamp already converts existing # timestamps to floats. if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) data.setdefault('checksum', None) data.setdefault('culprit', None) data.setdefault('dist', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('fingerprint', None) data.setdefault('logger', DEFAULT_LOGGER_NAME) data.setdefault('platform', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('tags', []) data.setdefault('transaction', None) # Fix case where legacy apps pass 'environment' as a tag # instead of a top level key. # TODO (alex) save() just reinserts the environment into the tags if not data.get('environment'): tagsdict = dict(data['tags']) if 'environment' in tagsdict: data['environment'] = tagsdict['environment'] del tagsdict['environment'] data['tags'] = tagsdict.items() # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] # If there is no User ip_addres, update it either from the Http interface # or the client_ip of the request. auth = request_env.get('auth') is_public = auth and auth.is_public add_ip_platforms = ('javascript', 'cocoa', 'objc') http_ip = data.get('sentry.interfaces.Http', {}).get('env', {}).get('REMOTE_ADDR') if http_ip: data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', http_ip) elif client_ip and (is_public or data.get('platform') in add_ip_platforms): data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', client_ip) if client_ip and data.get('sdk'): data['sdk']['client_ip'] = client_ip # Trim values data['logger'] = trim(data['logger'].strip(), 64) trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['transaction']: data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) return data
def to_python(cls, data): abs_path = data.get('abs_path') filename = data.get('filename') if not abs_path: abs_path = filename if not filename: filename = abs_path if abs_path and is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path assert filename or data.get('function') or data.get('module') context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get('context_line'), 256) if context_line is not None: pre_context = data.get('pre_context', None) if pre_context: pre_context = [c or '' for c in pre_context] post_context = data.get('post_context', None) if post_context: post_context = [c or '' for c in post_context] else: pre_context, post_context = None, None kwargs = { 'abs_path': trim(abs_path, 256), 'filename': trim(filename, 256), 'module': trim(data.get('module'), 256), 'function': trim(data.get('function'), 256), 'in_app': validate_bool(data.get('in_app'), False), 'context_line': context_line, # TODO(dcramer): trim pre/post_context 'pre_context': pre_context, 'post_context': post_context, 'vars': context_locals, 'data': extra_data, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def to_python(cls, data, rust_renormalized=RUST_RENORMALIZED_DEFAULT): if rust_renormalized: data = data.copy() for key in ( 'id', 'email', 'username', 'ip_address', 'name', 'geo', 'data', ): data.setdefault(key, None) if data['geo'] is not None: data['geo'] = Geo.to_python(data['geo']) return cls(**data) data = data.copy() ident = data.pop('id', None) if ident is not None: ident = trim(six.text_type(ident), 128) email = data.pop('email', None) if not isinstance(email, six.string_types): email = None email = trim(email, MAX_EMAIL_FIELD_LENGTH) username = data.pop('username', None) if username is not None: username = trim(six.text_type(username), 128) name = data.pop('name', None) if name is not None: name = trim(six.text_type(name), 128) try: ip_address = validate_ip(data.pop('ip_address', None), False) except ValueError: ip_address = None geo = data.pop('geo', None) if not geo and ip_address: geo = Geo.from_ip_address(ip_address) elif geo: geo = Geo.to_python(geo) extra_data = data.pop('data', None) if not isinstance(extra_data, dict): extra_data = {} extra_data.update(data) # TODO(dcramer): patch in fix to deal w/ old data but not allow new # if not (ident or email or username or ip_address): # raise ValueError('No identifying value') kwargs = { 'id': ident, 'email': email, 'username': username, 'ip_address': ip_address, 'name': name, 'geo': geo, 'data': trim_dict(extra_data) } return cls(**kwargs)
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if TagKey.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) current_timestamp = timezone.now() timestamp = data.get('timestamp') if not timestamp: timestamp = current_timestamp if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('culprit', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = list(tags.items()) # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue # XXX(dcramer): many legacy apps are using the environment tag # rather than the key itself if key == 'environment' and not data.get('environment'): data['environment'] = value else: data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in list(iter(data)): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: # XXX: we should consider logging this. pass # TODO(dcramer): this logic is duplicated in ``validate_data`` from # coreapi # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop('message', None) if message: if 'sentry.interfaces.Message' not in data: interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python({ 'message': message, }) data[inst.get_path()] = inst.to_json() except Exception: pass elif not data['sentry.interfaces.Message'].get('formatted'): interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python( dict( data['sentry.interfaces.Message'], formatted=message, )) data[inst.get_path()] = inst.to_json() except Exception: pass # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault( 'ip_address', ip_address) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) return data
def to_python(cls, data): if not data.get('url'): raise InterfaceValidationError("No value for 'url'") kwargs = {} if data.get('method'): method = data['method'].upper() # Optimize for the common path here, where it's a GET/POST, falling # back to a regular expresion test if method not in ('GET', 'POST') and not http_method_re.match(method): raise InterfaceValidationError("Invalid value for 'method'") kwargs['method'] = method else: kwargs['method'] = None scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url']) query_string = data.get('query_string') or query_bit if query_string: # if querystring was a dict, convert it to a string if isinstance(query_string, dict): query_string = urlencode([(to_bytes(k), to_bytes(v)) for k, v in query_string.items()]) else: query_string = query_string if query_string[0] == '?': # remove '?' prefix query_string = query_string[1:] kwargs['query_string'] = trim(query_string, 4096) else: kwargs['query_string'] = '' fragment = data.get('fragment') or fragment_bit cookies = data.get('cookies') # if cookies were [also] included in headers we # strip them out headers = data.get('headers') if headers: headers, cookie_header = format_headers(headers) if not cookies and cookie_header: cookies = cookie_header else: headers = () body = data.get('data') if isinstance(body, dict): body = json.dumps(body) if body: body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE) kwargs['cookies'] = trim_pairs(format_cookies(cookies)) kwargs['env'] = trim_dict(data.get('env') or {}) kwargs['headers'] = trim_pairs(headers) kwargs['data'] = fix_broken_encoding(body) kwargs['url'] = urlunsplit((scheme, netloc, path, '', '')) kwargs['fragment'] = trim(fragment, 1024) return cls(**kwargs)
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data data['version'] = self.version if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME else: data['logger'] = trim(data['logger'], 64) if data.get('platform'): data['platform'] = trim(data['platform'], 64) timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('message', None) data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('platform', None) data.setdefault('extra', {}) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = tags if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict( data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: # default the culprit to the url if not data['culprit']: data['culprit'] = data['sentry.interfaces.Http']['url'] if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['message']: data['message'] = trim( data['message'], settings.SENTRY_MAX_MESSAGE_LENGTH) return data
def normalize_event_data(self, data): # First we pull out our top-level (non-data attr) kwargs if not data.get('level') or data['level'] not in LOG_LEVELS_DICT: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = settings.DEFAULT_LOGGER_NAME timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() if not data.get('culprit'): data['culprit'] = '' # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if dj_settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex data.setdefault('message', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('platform', None) data.setdefault('extra', {}) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = tags if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=MAX_EXTRA_VARIABLE_SIZE) # HACK: move this to interfaces code if 'sentry.interfaces.Stacktrace' in data: for frame in data['sentry.interfaces.Stacktrace']['frames']: stack_vars = frame.get('vars', {}) trim_dict(stack_vars) if 'sentry.interfaces.Exception' in data: exc_data = data['sentry.interfaces.Exception'] for key in ('type', 'module', 'value'): value = exc_data.get(key) if value: exc_data[key] = trim(value) if 'sentry.interfaces.Message' in data: msg_data = data['sentry.interfaces.Message'] trim(msg_data['message'], 1024) if msg_data.get('params'): msg_data['params'] = trim(msg_data['params']) if 'sentry.interfaces.Http' in data: http_data = data['sentry.interfaces.Http'] for key in ('cookies', 'querystring', 'headers', 'env', 'url'): value = http_data.get(key) if not value: continue if type(value) == dict: trim_dict(value) else: http_data[key] = trim(value) value = http_data.get('data') if value: http_data['data'] = trim(value, 1024) return data
def normalize(self, request_env=None): request_env = request_env or {} data = self.data errors = data['errors'] = [] # Ignore event meta data for now. data.pop('_meta', None) # Before validating with a schema, attempt to cast values to their desired types # so that the schema doesn't have to take every type variation into account. text = six.text_type fp_types = six.string_types + six.integer_types + (float, ) def to_values(v): return {'values': v} if v and isinstance(v, (tuple, list)) else v def stringify(f): if isinstance(f, float): return text(int(f)) if abs(f) < (1 << 53) else None return text(f) casts = { 'environment': lambda v: text(v) if v is not None else v, 'fingerprint': lambda v: list(x for x in map(stringify, v) if x is not None) if isinstance(v, list) and all(isinstance(f, fp_types) for f in v) else v, 'release': lambda v: text(v) if v is not None else v, 'dist': lambda v: text(v).strip() if v is not None else v, 'time_spent': lambda v: int(v) if v is not None else v, 'tags': lambda v: [(text(v_k).replace(' ', '-').strip(), text(v_v).strip()) for (v_k, v_v) in dict(v).items()], 'timestamp': lambda v: process_timestamp(v), 'platform': lambda v: v if v in VALID_PLATFORMS else 'other', 'logentry': lambda v: v if isinstance(v, dict) else {'message': v}, # These can be sent as lists and need to be converted to {'values': [...]} 'exception': to_values, 'breadcrumbs': to_values, 'threads': to_values, } for c in casts: if c in data: try: data[c] = casts[c](data[c]) except InvalidTimestamp as it: errors.append({'type': it.args[0], 'name': c, 'value': data[c]}) del data[c] except Exception as e: errors.append({'type': EventError.INVALID_DATA, 'name': c, 'value': data[c]}) del data[c] # raw 'message' is coerced to the Message interface, as its used for pure index of # searchable strings. If both a raw 'message' and a Message interface exist, try and # add the former as the 'formatted' attribute of the latter. # See GH-3248 msg_str = data.pop('message', None) if msg_str: msg_if = data.get('logentry') msg_meta = data.get('_meta', {}).get('message') if not msg_if: msg_if = data['logentry'] = {'message': msg_str} if msg_meta: data.setdefault('_meta', {}).setdefault('logentry', {})['message'] = msg_meta if msg_if.get('message') != msg_str: if not msg_if.get('formatted'): msg_if['formatted'] = msg_str if msg_meta: data.setdefault('_meta', {}).setdefault( 'logentry', {})['formatted'] = msg_meta # Fill in ip addresses marked as {{auto}} client_ip = request_env.get('client_ip') if client_ip: if get_path(data, ['sentry.interfaces.Http', 'env', 'REMOTE_ADDR']) == '{{auto}}': data['sentry.interfaces.Http']['env']['REMOTE_ADDR'] = client_ip if get_path(data, ['request', 'env', 'REMOTE_ADDR']) == '{{auto}}': data['request']['env']['REMOTE_ADDR'] = client_ip if get_path(data, ['sentry.interfaces.User', 'ip_address']) == '{{auto}}': data['sentry.interfaces.User']['ip_address'] = client_ip if get_path(data, ['user', 'ip_address']) == '{{auto}}': data['user']['ip_address'] = client_ip # Validate main event body and tags against schema. # XXX(ja): jsonschema does not like CanonicalKeyDict, so we need to pass # in the inner data dict. is_valid, event_errors = validate_and_default_interface(data.data, 'event') errors.extend(event_errors) if 'tags' in data: is_valid, tag_errors = validate_and_default_interface(data['tags'], 'tags', name='tags') errors.extend(tag_errors) # Validate interfaces for k in list(iter(data)): if k in CLIENT_RESERVED_ATTRS: continue value = data.pop(k) if not value: self.logger.debug('Ignored empty interface value: %s', k) continue try: interface = get_interface(k) except ValueError: self.logger.debug('Ignored unknown attribute: %s', k) errors.append({'type': EventError.INVALID_ATTRIBUTE, 'name': k}) continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception as e: log = self.logger.debug if isinstance( e, InterfaceValidationError) else self.logger.error log('Discarded invalid value for interface: %s (%r)', k, value, exc_info=True) errors.append({'type': EventError.INVALID_DATA, 'name': k, 'value': value}) # Additional data coercion and defaulting level = data.get('level') or DEFAULT_LOG_LEVEL if isinstance(level, int) or (isinstance(level, six.string_types) and level.isdigit()): level = LOG_LEVELS.get(int(level), DEFAULT_LOG_LEVEL) data['level'] = LOG_LEVELS_MAP.get(level, LOG_LEVELS_MAP[DEFAULT_LOG_LEVEL]) if data.get('dist') and not data.get('release'): data['dist'] = None timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() # TODO (alex) can this all be replaced by utcnow? # it looks like the only time that this would even be hit is when timestamp # is not defined, as the earlier process_timestamp already converts existing # timestamps to floats. if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) data.setdefault('checksum', None) data.setdefault('culprit', None) data.setdefault('dist', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('fingerprint', None) data.setdefault('logger', DEFAULT_LOGGER_NAME) data.setdefault('platform', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('tags', []) data.setdefault('transaction', None) # Fix case where legacy apps pass 'environment' as a tag # instead of a top level key. # TODO (alex) save() just reinserts the environment into the tags if not data.get('environment'): tagsdict = dict(data['tags']) if 'environment' in tagsdict: data['environment'] = tagsdict['environment'] del tagsdict['environment'] data['tags'] = tagsdict.items() # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] # Exception mechanism needs SDK information to resolve proper names in # exception meta (such as signal names). "SDK Information" really means # the operating system version the event was generated on. Some # normalization still works without sdk_info, such as mach_exception # names (they can only occur on macOS). if exception: sdk_info = get_sdk_from_event(data) for ex in exception['values']: if 'mechanism' in ex: normalize_mechanism_meta(ex['mechanism'], sdk_info) # If there is no User ip_addres, update it either from the Http interface # or the client_ip of the request. auth = request_env.get('auth') is_public = auth and auth.is_public add_ip_platforms = ('javascript', 'cocoa', 'objc') http_ip = data.get('sentry.interfaces.Http', {}).get('env', {}).get('REMOTE_ADDR') if http_ip: data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', http_ip) elif client_ip and (is_public or data.get('platform') in add_ip_platforms): data.setdefault('sentry.interfaces.User', {}).setdefault('ip_address', client_ip) # Trim values data['logger'] = trim(data['logger'].strip(), 64) trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['transaction']: data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) return data
def to_python(cls, data, raw=False): is_valid, errors = validate_and_default_interface(data, cls.path) if not is_valid: raise InterfaceValidationError("Invalid stack frame data.") abs_path = data.get('abs_path') filename = data.get('filename') symbol = data.get('symbol') function = data.get('function') module = data.get('module') package = data.get('package') # For legacy reasons if function == '?': function = None # For consistency reasons if symbol == '?': symbol = None # Some of this processing should only be done for non raw frames if not raw: # absolute path takes priority over filename # (in the end both will get set) if not abs_path: abs_path = filename filename = None if not filename and abs_path: if is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path else: filename = abs_path else: filename = abs_path if not (filename or function or module or package): raise InterfaceValidationError( "No 'filename' or 'function' or 'module' or 'package'") platform = data.get('platform') context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals, object_hook=handle_nan) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get('context_line'), 256) if context_line is not None: pre_context = data.get('pre_context', None) if pre_context: pre_context = [c or '' for c in pre_context] post_context = data.get('post_context', None) if post_context: post_context = [c or '' for c in post_context] else: pre_context, post_context = None, None in_app = validate_bool(data.get('in_app'), False) kwargs = { 'abs_path': trim(abs_path, 2048), 'filename': trim(filename, 256), 'platform': platform, 'module': trim(module, 256), 'function': trim(function, 256), 'package': package, 'image_addr': to_hex_addr(data.get('image_addr')), 'symbol': trim(symbol, 256), 'symbol_addr': to_hex_addr(data.get('symbol_addr')), 'instruction_addr': to_hex_addr(data.get('instruction_addr')), 'in_app': in_app, 'context_line': context_line, # TODO(dcramer): trim pre/post_context 'pre_context': pre_context, 'post_context': post_context, 'vars': context_locals, 'data': extra_data, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def normalize(self): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs data = self.data if not isinstance(data.get('level'), (six.string_types, int)): data['level'] = logging.ERROR elif data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger') or not isinstance(data.get('logger'), six.string_types): data['logger'] = DEFAULT_LOGGER_NAME else: logger = trim(data['logger'].strip(), 64) if tagstore.is_valid_key(logger): data['logger'] = logger else: data['logger'] = DEFAULT_LOGGER_NAME if data.get('platform'): data['platform'] = trim(data['platform'], 64) current_timestamp = timezone.now() timestamp = data.get('timestamp') if not timestamp: timestamp = current_timestamp if isinstance(timestamp, datetime): # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) timestamp = float(timestamp.strftime('%s')) data['timestamp'] = timestamp data['received'] = float(timezone.now().strftime('%s')) if not data.get('event_id'): data['event_id'] = uuid4().hex data.setdefault('culprit', None) data.setdefault('transaction', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('fingerprint', None) data.setdefault('platform', None) data.setdefault('dist', None) data.setdefault('environment', None) data.setdefault('extra', {}) data.setdefault('errors', []) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = list(tags.items()) # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = [] for key, value in tags: key = six.text_type(key).strip() value = six.text_type(value).strip() if not (key and value): continue # XXX(dcramer): many legacy apps are using the environment tag # rather than the key itself if key == 'environment' and not data.get('environment'): data['environment'] = value else: data['tags'].append((key, value)) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=settings.SENTRY_MAX_EXTRA_VARIABLE_SIZE) # TODO(dcramer): more of validate data needs stuffed into the manager for key in list(iter(data)): if key in CLIENT_RESERVED_ATTRS: continue value = data.pop(key) try: interface = get_interface(key)() except ValueError: continue try: inst = interface.to_python(value) data[inst.get_path()] = inst.to_json() except Exception: # XXX: we should consider logging this. pass # TODO(dcramer): this logic is duplicated in ``validate_data`` from # coreapi # message is coerced to an interface, as its used for pure # index of searchable strings # See GH-3248 message = data.pop('message', None) if message: if 'sentry.interfaces.Message' not in data: interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python({ 'message': message, }) data[inst.get_path()] = inst.to_json() except Exception: pass elif not data['sentry.interfaces.Message'].get('formatted'): interface = get_interface('sentry.interfaces.Message') try: inst = interface.to_python( dict( data['sentry.interfaces.Message'], formatted=message, ) ) data[inst.get_path()] = inst.to_json() except Exception: pass # the SDKs currently do not describe event types, and we must infer # them from available attributes data['type'] = eventtypes.infer(data).key data['version'] = self.version # TODO(dcramer): find a better place for this logic exception = data.get('sentry.interfaces.Exception') stacktrace = data.get('sentry.interfaces.Stacktrace') if exception and len(exception['values']) == 1 and stacktrace: exception['values'][0]['stacktrace'] = stacktrace del data['sentry.interfaces.Stacktrace'] if 'sentry.interfaces.Http' in data: try: ip_address = validate_ip( data['sentry.interfaces.Http'].get('env', {}).get('REMOTE_ADDR'), required=False, ) except ValueError: ip_address = None if ip_address: data.setdefault('sentry.interfaces.User', {}) data['sentry.interfaces.User'].setdefault('ip_address', ip_address) if data['culprit']: data['culprit'] = trim(data['culprit'], MAX_CULPRIT_LENGTH) if data['transaction']: data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) return data
def to_python(cls, data): abs_path = data.get('abs_path') filename = data.get('filename') function = data.get('function') module = data.get('module') for name in ('abs_path', 'filename', 'function', 'module'): if not isinstance(data.get(name), (string_types, NoneType)): raise InterfaceValidationError("Invalid value for '%s'" % name) # absolute path takes priority over filename # (in the end both will get set) if not abs_path: abs_path = filename filename = None if not filename and abs_path: if is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path else: filename = abs_path else: filename = abs_path if not (filename or function or module): raise InterfaceValidationError("No 'filename' or 'function' or 'module'") if function == '?': function = None context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals, object_hook=handle_nan) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get('context_line'), 256) if context_line is not None: pre_context = data.get('pre_context', None) if pre_context: pre_context = [c or '' for c in pre_context] post_context = data.get('post_context', None) if post_context: post_context = [c or '' for c in post_context] else: pre_context, post_context = None, None try: in_app = validate_bool(data.get('in_app'), False) except AssertionError: raise InterfaceValidationError("Invalid value for 'in_app'") instruction_offset = data.get('instruction_offset') if instruction_offset is not None and \ not isinstance(instruction_offset, (int, long)): raise InterfaceValidationError("Invalid value for 'instruction_offset'") kwargs = { 'abs_path': trim(abs_path, 256), 'filename': trim(filename, 256), 'module': trim(module, 256), 'function': trim(function, 256), 'package': trim(data.get('package'), 256), 'symbol_addr': trim(data.get('symbol_addr'), 16), 'instruction_addr': trim(data.get('instruction_addr'), 16), 'instruction_offset': instruction_offset, 'in_app': in_app, 'context_line': context_line, # TODO(dcramer): trim pre/post_context 'pre_context': pre_context, 'post_context': post_context, 'vars': context_locals, 'data': extra_data, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def to_python(cls, data): assert data.get('url') kwargs = {} if data.get('method'): method = data['method'].upper() assert method in HTTP_METHODS kwargs['method'] = method else: kwargs['method'] = None scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url']) query_string = data.get('query_string') or query_bit if query_string: # if querystring was a dict, convert it to a string if isinstance(query_string, dict): query_string = urlencode(query_string.items()) else: query_string = query_string if query_string[0] == '?': # remove '?' prefix query_string = query_string[1:] kwargs['query_string'] = trim(query_string, 1024) else: kwargs['query_string'] = '' fragment = data.get('fragment') or fragment_bit cookies = data.get('cookies') # if cookies were [also] included in headers we # strip them out headers = data.get('headers') if headers: if 'Cookie' in headers and not cookies: cookies = headers.pop('Cookie') headers = trim_dict(headers) else: headers = {} body = data.get('data') if isinstance(body, (list, tuple)): body = trim_dict(dict(enumerate(body))) elif isinstance(body, dict): body = trim_dict(body) elif body: body = trim(body, 2048) if headers.get('Content-Type') == cls.FORM_TYPE and '=' in body: body = dict(parse_qsl(body)) # if cookies were a string, convert to a dict # parse_qsl will parse both acceptable formats: # a=b&c=d # and # a=b;c=d if isinstance(cookies, basestring): cookies = dict(parse_qsl(cookies, keep_blank_values=True)) elif not cookies: cookies = {} kwargs['cookies'] = trim_dict(cookies) kwargs['env'] = trim_dict(data.get('env') or {}) kwargs['headers'] = headers kwargs['data'] = body kwargs['url'] = urlunsplit((scheme, netloc, path, '', '')) kwargs['fragment'] = trim(fragment, 256) return cls(**kwargs)
def to_python(cls, data): if not data.get('url'): raise InterfaceValidationError("No value for 'url'") kwargs = {} if data.get('method'): method = data['method'].upper() # Optimize for the common path here, where it's a GET/POST, falling # back to a regular expresion test if method not in ('GET', 'POST') and not http_method_re.match(method): raise InterfaceValidationError("Invalid value for 'method'") kwargs['method'] = method else: kwargs['method'] = None scheme, netloc, path, query_bit, fragment_bit = urlsplit(data['url']) query_string = data.get('query_string') or query_bit if query_string: # if querystring was a dict, convert it to a string if isinstance(query_string, dict): query_string = urlencode( [(to_bytes(k), to_bytes(v)) for k, v in query_string.items()] ) else: query_string = query_string if query_string[0] == '?': # remove '?' prefix query_string = query_string[1:] kwargs['query_string'] = trim(query_string, 4096) else: kwargs['query_string'] = '' fragment = data.get('fragment') or fragment_bit cookies = data.get('cookies') # if cookies were [also] included in headers we # strip them out headers = data.get('headers') if headers: headers, cookie_header = format_headers(headers) if not cookies and cookie_header: cookies = cookie_header else: headers = () # We prefer the body to be a string, since we can then attempt to parse it # as JSON OR decode it as a URL encoded query string, without relying on # the correct content type header being passed. body = data.get('data') content_type = next((v for k, v in headers if k == 'Content-Type'), None) # Remove content type parameters if content_type is not None: content_type = content_type.partition(';')[0].rstrip() # We process request data once during ingestion and again when # requesting the http interface over the API. Avoid overwriting # decoding the body again. inferred_content_type = data.get('inferred_content_type', content_type) if 'inferred_content_type' not in data and not isinstance(body, dict): body, inferred_content_type = heuristic_decode(body, content_type) if body: body = trim(body, settings.SENTRY_MAX_HTTP_BODY_SIZE) kwargs['inferred_content_type'] = inferred_content_type kwargs['cookies'] = trim_pairs(format_cookies(cookies)) kwargs['env'] = trim_dict(data.get('env') or {}) kwargs['headers'] = trim_pairs(headers) kwargs['data'] = fix_broken_encoding(body) kwargs['url'] = urlunsplit((scheme, netloc, path, '', '')) kwargs['fragment'] = trim(fragment, 1024) return cls(**kwargs)
def to_python(cls, data, raw=False): abs_path = data.get('abs_path') filename = data.get('filename') symbol = data.get('symbol') function = data.get('function') module = data.get('module') package = data.get('package') # For legacy reasons if function == '?': function = None # For consistency reasons if symbol == '?': symbol = None for name in ('abs_path', 'filename', 'symbol', 'function', 'module', 'package'): v = data.get(name) if v is not None and not isinstance(v, six.string_types): raise InterfaceValidationError("Invalid value for '%s'" % name) # Some of this processing should only be done for non raw frames if not raw: # absolute path takes priority over filename # (in the end both will get set) if not abs_path: abs_path = filename filename = None if not filename and abs_path: if is_url(abs_path): urlparts = urlparse(abs_path) if urlparts.path: filename = urlparts.path else: filename = abs_path else: filename = abs_path if not (filename or function or module or package): raise InterfaceValidationError("No 'filename' or 'function' or " "'module' or 'package'") platform = data.get('platform') if platform not in VALID_PLATFORMS: platform = None context_locals = data.get('vars') or {} if isinstance(context_locals, (list, tuple)): context_locals = dict(enumerate(context_locals)) elif not isinstance(context_locals, dict): context_locals = {} context_locals = trim_dict(context_locals, object_hook=handle_nan) # extra data is used purely by internal systems, # so we dont trim it extra_data = data.get('data') or {} if isinstance(extra_data, (list, tuple)): extra_data = dict(enumerate(extra_data)) # XXX: handle lines which were sent as 'null' context_line = trim(data.get('context_line'), 256) if context_line is not None: pre_context = data.get('pre_context', None) if pre_context: pre_context = [c or '' for c in pre_context] post_context = data.get('post_context', None) if post_context: post_context = [c or '' for c in post_context] else: pre_context, post_context = None, None try: in_app = validate_bool(data.get('in_app'), False) except AssertionError: raise InterfaceValidationError("Invalid value for 'in_app'") instruction_offset = data.get('instruction_offset') if instruction_offset is not None and \ not isinstance(instruction_offset, six.integer_types): raise InterfaceValidationError("Invalid value for 'instruction_offset'") kwargs = { 'abs_path': trim(abs_path, 2048), 'filename': trim(filename, 256), 'platform': platform, 'module': trim(module, 256), 'function': trim(function, 256), 'package': package, 'image_addr': to_hex_addr(data.get('image_addr')), 'symbol': trim(symbol, 256), 'symbol_addr': to_hex_addr(data.get('symbol_addr')), 'instruction_addr': to_hex_addr(data.get('instruction_addr')), 'instruction_offset': instruction_offset, 'in_app': in_app, 'context_line': context_line, # TODO(dcramer): trim pre/post_context 'pre_context': pre_context, 'post_context': post_context, 'vars': context_locals, 'data': extra_data, 'errors': data.get('errors'), } if data.get('lineno') is not None: lineno = int(data['lineno']) if lineno < 0: lineno = None kwargs['lineno'] = lineno else: kwargs['lineno'] = None if data.get('colno') is not None: kwargs['colno'] = int(data['colno']) else: kwargs['colno'] = None return cls(**kwargs)
def normalize_event_data(self, data): # TODO(dcramer): store http.env.REMOTE_ADDR as user.ip # First we pull out our top-level (non-data attr) kwargs if not data.get('level') or data['level'] not in LOG_LEVELS: data['level'] = logging.ERROR if not data.get('logger'): data['logger'] = DEFAULT_LOGGER_NAME timestamp = data.get('timestamp') if not timestamp: timestamp = timezone.now() # We must convert date to local time so Django doesn't mess it up # based on TIME_ZONE if settings.TIME_ZONE: if not timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=timezone.utc) elif timezone.is_aware(timestamp): timestamp = timestamp.replace(tzinfo=None) data['timestamp'] = timestamp if not data.get('event_id'): data['event_id'] = uuid.uuid4().hex data.setdefault('message', None) data.setdefault('culprit', None) data.setdefault('time_spent', None) data.setdefault('server_name', None) data.setdefault('site', None) data.setdefault('checksum', None) data.setdefault('platform', None) data.setdefault('extra', {}) tags = data.get('tags') if not tags: tags = [] # full support for dict syntax elif isinstance(tags, dict): tags = tags.items() # prevent [tag, tag, tag] (invalid) syntax elif not all(len(t) == 2 for t in tags): tags = [] else: tags = list(tags) data['tags'] = tags data['message'] = strip(data['message']) data['culprit'] = strip(data['culprit']) if not isinstance(data['extra'], dict): # throw it away data['extra'] = {} trim_dict(data['extra'], max_size=MAX_EXTRA_VARIABLE_SIZE) if 'sentry.interfaces.Exception' in data: if 'values' not in data['sentry.interfaces.Exception']: data['sentry.interfaces.Exception'] = { 'values': [data['sentry.interfaces.Exception']] } # convert stacktrace + exception into expanded exception if 'sentry.interfaces.Stacktrace' in data: data['sentry.interfaces.Exception']['values'][0][ 'stacktrace'] = data.pop('sentry.interfaces.Stacktrace') for exc_data in data['sentry.interfaces.Exception']['values']: for key in ('type', 'module', 'value'): value = exc_data.get(key) if value: exc_data[key] = trim(value) if exc_data.get('stacktrace'): for frame in exc_data['stacktrace']['frames']: stack_vars = frame.get('vars', {}) trim_dict(stack_vars) if 'sentry.interfaces.Stacktrace' in data: for frame in data['sentry.interfaces.Stacktrace']['frames']: stack_vars = frame.get('vars', {}) trim_dict(stack_vars) if 'sentry.interfaces.Message' in data: msg_data = data['sentry.interfaces.Message'] trim(msg_data['message'], 1024) if msg_data.get('params'): msg_data['params'] = trim(msg_data['params']) if 'sentry.interfaces.Http' in data: http_data = data['sentry.interfaces.Http'] for key in ('cookies', 'querystring', 'headers', 'env', 'url'): value = http_data.get(key) if not value: continue if type(value) == dict: trim_dict(value) else: http_data[key] = trim(value) value = http_data.get('data') if value: http_data['data'] = trim(value, 2048) # default the culprit to the url if not data['culprit']: data['culprit'] = trim(strip(http_data.get('url')), MAX_CULPRIT_LENGTH) return data