def _get_project_from_id(self, project_id): if not project_id: return if not project_id.isdigit(): track_outcome(0, 0, None, Outcome.INVALID, "project_id") raise APIError('Invalid project_id: %r' % project_id) try: return Project.objects.get_from_cache(id=project_id) except Project.DoesNotExist: track_outcome(0, 0, None, Outcome.INVALID, "project_id") raise APIError('Invalid project_id: %r' % project_id)
def process(self, request, project, key, auth, helper, data, attachments=None, **kwargs): metrics.incr('events.total', skip_internal=False) if not data: track_outcome(project.organization_id, project.id, key.id, Outcome.INVALID, "no_data") raise APIError('No JSON data was found') remote_addr = request.META['REMOTE_ADDR'] event_manager = EventManager( data, project=project, key=key, auth=auth, client_ip=remote_addr, user_agent=helper.context.agent, version=auth.version, content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''), ) del data self.pre_normalize(event_manager, helper) event_manager.normalize() data = event_manager.get_data() dict_data = dict(data) data_size = len(json.dumps(dict_data)) if data_size > 10000000: metrics.timing('events.size.rejected', data_size) track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, 'too_large', event_id=dict_data.get('event_id') ) raise APIForbidden("Event size exceeded 10MB after normalization.") metrics.timing( 'events.size.data.post_storeendpoint', data_size, tags={'project_id': project.id} ) return process_event(event_manager, project, key, remote_addr, helper, attachments)
def _parse_header(self, request, helper, project): auth = self.auth_helper_cls.auth_from_request(request) if auth.version not in PROTOCOL_VERSIONS: track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "auth_version") raise APIError( 'Client using unsupported server protocol version (%r)' % six.text_type(auth.version or '') ) if not auth.client: track_outcome(project.organization_id, project.id, None, Outcome.INVALID, "auth_client") raise APIError("Client did not send 'client' identifier") return auth
def _dispatch(self, request, helper, sentry_key, project_id=None, origin=None, *args, **kwargs): if request.method != 'POST': track_outcome(0, 0, None, Outcome.INVALID, "disallowed_method") return HttpResponseNotAllowed(['POST']) content_type = request.META.get('CONTENT_TYPE') if content_type is None or not content_type.startswith(self.content_types): track_outcome(0, 0, None, Outcome.INVALID, "content_type") raise APIError('Invalid Content-Type') request.user = AnonymousUser() project = self._get_project_from_id(project_id) helper.context.bind_project(project) auth = Auth({'sentry_key': sentry_key}, is_public=False) auth.client = 'sentry.unreal_engine' key = helper.project_key_from_auth(auth) if key.project_id != project.id: track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "multi_project_id") raise APIError('Two different projects were specified') helper.context.bind_auth(auth) return super(APIView, self).dispatch( request=request, project=project, auth=auth, helper=helper, key=key, **kwargs )
def post(self, request, project, helper, key, **kwargs): json_body = safely_load_json_string(request.body) report_type = self.security_report_type(json_body) if report_type is None: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, "security_report_type") raise APIError('Unrecognized security report type') interface = get_interface(report_type) try: instance = interface.from_raw(json_body) except jsonschema.ValidationError as e: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, "security_report") raise APIError('Invalid security report: %s' % str(e).splitlines()[0]) # Do origin check based on the `document-uri` key as explained in `_dispatch`. origin = instance.get_origin() if not is_valid_origin(origin, project): if project: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, FilterStatKeys.CORS) raise APIForbidden('Invalid origin') data = { 'interface': interface.path, 'report': instance, 'release': request.GET.get('sentry_release'), 'environment': request.GET.get('sentry_environment'), } self.process(request, project=project, helper=helper, data=data, key=key, **kwargs) return HttpResponse(content_type='application/javascript', status=201)
def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs): # TODO(ja): Refactor shared code with CspReportView. Especially, look at # the sentry_key override and test it. # A minidump submission as implemented by Breakpad and Crashpad or any # other library following the Mozilla Soccorro protocol is a POST request # without Origin or Referer headers. Therefore, we cannot validate the # origin of the request, but we *can* validate the "prod" key in future. if request.method != 'POST': track_outcome(0, 0, None, Outcome.INVALID, "disallowed_method") return HttpResponseNotAllowed(['POST']) content_type = request.META.get('CONTENT_TYPE') # In case of multipart/form-data, the Content-Type header also includes # a boundary. Therefore, we cannot check for an exact match. if content_type is None or not content_type.startswith(self.content_types): track_outcome(0, 0, None, Outcome.INVALID, "content_type") raise APIError('Invalid Content-Type') request.user = AnonymousUser() project = self._get_project_from_id(project_id) helper.context.bind_project(project) # This is yanking the auth from the querystring since it's not # in the POST body. This means we expect a `sentry_key` and # `sentry_version` to be set in querystring auth = self.auth_helper_cls.auth_from_request(request) key = helper.project_key_from_auth(auth) if key.project_id != project.id: track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "multi_project_id") raise APIError('Two different projects were specified') helper.context.bind_auth(auth) return super(APIView, self).dispatch( request=request, project=project, auth=auth, helper=helper, key=key, **kwargs )
def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs): # A CSP report is sent as a POST request with no Origin or Referer # header. What we're left with is a 'document-uri' key which is # inside of the JSON body of the request. This 'document-uri' value # should be treated as an origin check since it refers to the page # that triggered the report. The Content-Type is supposed to be # `application/csp-report`, but FireFox sends it as `application/json`. if request.method != 'POST': track_outcome(0, 0, None, Outcome.INVALID, "disallowed_method") return HttpResponseNotAllowed(['POST']) if request.META.get('CONTENT_TYPE') not in self.content_types: track_outcome(0, 0, None, Outcome.INVALID, "content_type") raise APIError('Invalid Content-Type') request.user = AnonymousUser() project = self._get_project_from_id(project_id) helper.context.bind_project(project) # This is yanking the auth from the querystring since it's not # in the POST body. This means we expect a `sentry_key` and # `sentry_version` to be set in querystring auth = self.auth_helper_cls.auth_from_request(request) key = helper.project_key_from_auth(auth) if key.project_id != project.id: track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "multi_project_id") raise APIError('Two different projects were specified') helper.context.bind_auth(auth) return super(APIView, self).dispatch( request=request, project=project, auth=auth, helper=helper, key=key, **kwargs )
def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs): request.user = AnonymousUser() project = self._get_project_from_id(project_id) if project: helper.context.bind_project(project) if origin is not None: # This check is specific for clients who need CORS support if not project: raise APIError('Client must be upgraded for CORS support') if not is_valid_origin(origin, project): track_outcome(project.organization_id, project.id, None, Outcome.INVALID, FilterStatKeys.CORS) raise APIForbidden('Invalid origin: %s' % (origin, )) # XXX: It seems that the OPTIONS call does not always include custom headers if request.method == 'OPTIONS': response = self.options(request, project) else: auth = self._parse_header(request, helper, project) key = helper.project_key_from_auth(auth) # Legacy API was /api/store/ and the project ID was only available elsewhere if not project: project = Project.objects.get_from_cache(id=key.project_id) helper.context.bind_project(project) elif key.project_id != project.id: raise APIError('Two different projects were specified') helper.context.bind_auth(auth) # Explicitly bind Organization so we don't implicitly query it later # this just allows us to comfortably assure that `project.organization` is safe. # This also allows us to pull the object from cache, instead of being # implicitly fetched from database. project.organization = Organization.objects.get_from_cache( id=project.organization_id) response = super(APIView, self).dispatch(request=request, project=project, auth=auth, helper=helper, key=key, **kwargs) if origin: if origin == 'null': # If an Origin is `null`, but we got this far, that means # we've gotten past our CORS check for some reason. But the # problem is that we can't return "null" as a valid response # to `Access-Control-Allow-Origin` and we don't have another # value to work with, so just allow '*' since they've gotten # this far. response['Access-Control-Allow-Origin'] = '*' else: response['Access-Control-Allow-Origin'] = origin return response
def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs): request.user = AnonymousUser() project = self._get_project_from_id(project_id) if project: helper.context.bind_project(project) if origin is not None: # This check is specific for clients who need CORS support if not project: raise APIError('Client must be upgraded for CORS support') if not is_valid_origin(origin, project): track_outcome( project.organization_id, project.id, None, Outcome.INVALID, FilterStatKeys.CORS) raise APIForbidden('Invalid origin: %s' % (origin, )) # XXX: It seems that the OPTIONS call does not always include custom headers if request.method == 'OPTIONS': response = self.options(request, project) else: auth = self._parse_header(request, helper, project) key = helper.project_key_from_auth(auth) # Legacy API was /api/store/ and the project ID was only available elsewhere if not project: project = Project.objects.get_from_cache(id=key.project_id) helper.context.bind_project(project) elif key.project_id != project.id: raise APIError('Two different projects were specified') helper.context.bind_auth(auth) # Explicitly bind Organization so we don't implicitly query it later # this just allows us to comfortably assure that `project.organization` is safe. # This also allows us to pull the object from cache, instead of being # implicitly fetched from database. project.organization = Organization.objects.get_from_cache( id=project.organization_id) response = super(APIView, self).dispatch( request=request, project=project, auth=auth, helper=helper, key=key, **kwargs ) if origin: if origin == 'null': # If an Origin is `null`, but we got this far, that means # we've gotten past our CORS check for some reason. But the # problem is that we can't return "null" as a valid response # to `Access-Control-Allow-Origin` and we don't have another # value to work with, so just allow '*' since they've gotten # this far. response['Access-Control-Allow-Origin'] = '*' else: response['Access-Control-Allow-Origin'] = origin return response
def save_attachment( cache_key, attachment, project, event_id, key_id=None, group_id=None, start_time=None ): """ Persists a cached event attachments into the file store. Emits one outcome, either ACCEPTED on success or INVALID(missing_chunks) if retrieving the attachment data fails. :param cache_key: The cache key at which the attachment is stored for debugging purposes. :param attachment: The ``CachedAttachment`` instance to store. :param project: The project model that this attachment belongs to. :param event_id: Identifier of the event that this attachment belongs to. The event does not have to be stored yet. :param key_id: Optional identifier of the DSN that was used to ingest the attachment. :param group_id: Optional group identifier for the event. May be empty if the event has not been stored yet, or if it is not grouped. :param start_time: UNIX Timestamp (float) when the attachment was ingested. If missing, the current time is used. """ if start_time is not None: timestamp = to_datetime(start_time) else: timestamp = datetime.utcnow().replace(tzinfo=UTC) try: data = attachment.data except MissingAttachmentChunks: track_outcome( org_id=project.organization_id, project_id=project.id, key_id=key_id, outcome=Outcome.INVALID, reason="missing_chunks", timestamp=timestamp, event_id=event_id, category=DataCategory.ATTACHMENT, ) logger.exception("Missing chunks for cache_key=%s", cache_key) return file = File.objects.create( name=attachment.name, type=attachment.type, headers={"Content-Type": attachment.content_type}, ) file.putfile(six.BytesIO(data)) EventAttachment.objects.create( event_id=event_id, project_id=project.id, group_id=group_id, name=attachment.name, file=file, ) track_outcome( org_id=project.organization_id, project_id=project.id, key_id=key_id, outcome=Outcome.ACCEPTED, reason=None, timestamp=timestamp, event_id=event_id, category=DataCategory.ATTACHMENT, quantity=attachment.size or 1, )
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] if should_filter: # Mark that the event_filtered signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_filtered.send_robust` below. mark_signal_sent(project_config.project_id, event_id) track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) event_filtered.send_robust(ip=remote_addr, project=project, sender=process_event) raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") # Mark that the event_dropped signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_dropped.send_robust` below. mark_signal_sent(project_config.project_id, event_id) reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) event_dropped.send_robust(ip=remote_addr, project=project, reason_code=reason, sender=process_event) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) config = project_config.config datascrubbing_settings = config.get("datascrubbingSettings") or {} scrub_ip_address = datascrubbing_settings.get("scrubIpAddresses") scrub_data = datascrubbing_settings.get("scrubData") if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields = datascrubbing_settings.get("sensitiveFields") exclude_fields = datascrubbing_settings.get("excludeFields") scrub_defaults = datascrubbing_settings.get("scrubDefaults") SensitiveDataFilter(fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def filter_attachments_for_group(attachments, job): """ Removes crash reports exceeding the group-limit. If the project or organization is configured to limit the amount of crash reports per group, the number of stored crashes is limited. This requires `event.group` to be set. Emits one outcome per removed attachment. :param attachments: The full list of attachments to filter. :param job: The job context container. """ if not attachments: return attachments event = job["event"] project = event.project # The setting is both an organization and project setting. The project # setting strictly overrides the organization setting, unless set to the # default. max_crashreports = get_max_crashreports(project) if not max_crashreports: max_crashreports = get_max_crashreports(project.organization) # The number of crash reports is cached per group crashreports_key = get_crashreport_key(event.group_id) # Only fetch the number of stored crash reports if there is a crash report # in the list of attachments. Otherwise, we won't require this number. if any(attachment.type in CRASH_REPORT_TYPES for attachment in attachments): cached_reports = get_stored_crashreports(crashreports_key, event, max_crashreports) else: cached_reports = 0 stored_reports = cached_reports filtered = [] refund_quantity = 0 for attachment in attachments: # If the attachment is a crash report (e.g. minidump), we need to honor # the store_crash_reports setting. Otherwise, we assume that the client # has already verified PII and just store the attachment. if attachment.type in CRASH_REPORT_TYPES: if crashreports_exceeded(stored_reports, max_crashreports): track_outcome( org_id=event.project.organization_id, project_id=job["project_id"], key_id=job["key_id"], outcome=Outcome.FILTERED, reason=FilterStatKeys.CRASH_REPORT_LIMIT, timestamp=to_datetime(job["start_time"]), event_id=event.event_id, category=DataCategory.ATTACHMENT, quantity=attachment.size, ) # Quotas are counted with at least ``1`` for attachments. refund_quantity += attachment.size or 1 continue stored_reports += 1 filtered.append(attachment) # Check if we have exceeded the stored crash reports count. If so, we # persist the current maximum (not the actual number!) into the cache. Next # time when loading from the cache, we will validate that this number has # not changed, or otherwise re-fetch from the database. if crashreports_exceeded(stored_reports, max_crashreports) and stored_reports > cached_reports: cache.set(crashreports_key, max_crashreports, CRASH_REPORT_TIMEOUT) if refund_quantity: quotas.refund( project, key=job["project_key"], timestamp=job["start_time"], category=DataCategory.ATTACHMENT, quantity=refund_quantity, ) return filtered
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data['event_id'] if should_filter: track_outcome( project.organization_id, project.id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id ) metrics.incr( 'events.blacklisted', tags={'reason': filter_reason}, skip_internal=False ) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason,)) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute( quotas.is_rate_limited, project=project, key=key, _with_transaction=False ) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') reason = rate_limit.reason_code if rate_limit else None track_outcome( project.organization_id, project.id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id ) metrics.incr( 'events.dropped', tags={ 'reason': reason or 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) org_options = OrganizationOption.objects.get_all_values( project.organization_id) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % (project.id, event_id, ) if cache.get(cache_key) is not None: track_outcome( project.organization_id, project.id, key.id, Outcome.INVALID, 'duplicate', event_id=event_id ) raise APIForbidden( 'An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = ( org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, []) ) exclude_fields_key = 'sentry:safe_fields' exclude_fields = ( org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, []) ) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def post(self, request, project, **kwargs): # Minidump request payloads do not have the same structure as # usual events from other SDKs. Most notably, the event needs # to be transfered in the `sentry` form field. All other form # fields are assumed "extra" information. The only exception # to this is `upload_file_minidump`, which contains the minidump. if any(key.startswith('sentry[') for key in request.POST): # First, try to parse the nested form syntax `sentry[key][key]` # This is required for the Breakpad client library, which only # supports string values of up to 64 characters. extra = parser.parse(request.POST.urlencode()) data = extra.pop('sentry', {}) else: # Custom clients can submit longer payloads and should JSON # encode event data into the optional `sentry` field. extra = request.POST json_data = extra.pop('sentry', None) data = json.loads(json_data[0]) if json_data else {} # Merge additional form fields from the request with `extra` # data from the event payload and set defaults for processing. extra.update(data.get('extra', {})) data['extra'] = extra # Assign our own UUID so we can track this minidump. We cannot trust the # uploaded filename, and if reading the minidump fails there is no way # we can ever retrieve the original UUID from the minidump. event_id = data.get('event_id') or uuid.uuid4().hex data['event_id'] = event_id # At this point, we only extract the bare minimum information # needed to continue processing. This requires to process the # minidump without symbols and CFI to obtain an initial stack # trace (most likely via stack scanning). If all validations # pass, the event will be inserted into the database. try: minidump = request.FILES['upload_file_minidump'] except KeyError: track_outcome(project.organization_id, project.id, None, Outcome.INVALID, "missing_minidump_upload") raise APIError('Missing minidump upload') # Breakpad on linux sometimes stores the entire HTTP request body as # dump file instead of just the minidump. The Electron SDK then for # example uploads a multipart formdata body inside the minidump file. # It needs to be re-parsed, to extract the actual minidump before # continuing. minidump.seek(0) if minidump.read(2) == b'--': # The remaining bytes of the first line are the form boundary. We # have already read two bytes, the remainder is the form boundary # (excluding the initial '--'). boundary = minidump.readline().rstrip() minidump.seek(0) # Next, we have to fake a HTTP request by specifying the form # boundary and the content length, or otherwise Django will not try # to parse our form body. Also, we need to supply new upload # handlers since they cannot be reused from the current request. meta = { 'CONTENT_TYPE': b'multipart/form-data; boundary=%s' % boundary, 'CONTENT_LENGTH': minidump.size, } handlers = [ uploadhandler.load_handler(handler, request) for handler in settings.FILE_UPLOAD_HANDLERS ] _, files = MultiPartParser(meta, minidump, handlers).parse() try: minidump = files['upload_file_minidump'] except KeyError: track_outcome(project.organization_id, project.id, None, Outcome.INVALID, "missing_minidump_upload") raise APIError('Missing minidump upload') if minidump.size == 0: track_outcome(project.organization_id, project.id, None, Outcome.INVALID, "empty_minidump") raise APIError('Empty minidump upload received') if settings.SENTRY_MINIDUMP_CACHE: if not os.path.exists(settings.SENTRY_MINIDUMP_PATH): os.mkdir(settings.SENTRY_MINIDUMP_PATH, 0o744) with open('%s/%s.dmp' % (settings.SENTRY_MINIDUMP_PATH, event_id), 'wb') as out: for chunk in minidump.chunks(): out.write(chunk) # Always store the minidump in attachments so we can access it during # processing, regardless of the event-attachments feature. This will # allow us to stack walk again with CFI once symbols are loaded. attachments = [] minidump.seek(0) attachments.append( CachedAttachment.from_upload(minidump, type=MINIDUMP_ATTACHMENT_TYPE)) has_event_attachments = features.has('organizations:event-attachments', project.organization, actor=request.user) # Append all other files as generic attachments. We can skip this if the # feature is disabled since they won't be saved. for name, file in six.iteritems(request.FILES): if name == 'upload_file_minidump': continue # Known attachment: msgpack event if name == "__sentry-event": merge_attached_event(file, data) continue if name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"): merge_attached_breadcrumbs(file, data) continue # Add any other file as attachment if has_event_attachments: attachments.append(CachedAttachment.from_upload(file)) write_minidump_placeholder(data) event_id = self.process(request, attachments=attachments, data=data, project=project, **kwargs) # Return the formatted UUID of the generated event. This is # expected by the Electron http uploader on Linux and doesn't # break the default Breakpad client library. return HttpResponse(six.text_type(uuid.UUID(event_id)), content_type='text/plain')
def post(self, request, project, project_config, **kwargs): attachments_enabled = features.has('organizations:event-attachments', project.organization, actor=request.user) is_apple_crash_report = False attachments = [] event = {'event_id': uuid.uuid4().hex} try: unreal = process_unreal_crash(request.body, request.GET.get( 'UserID'), request.GET.get('AppEnvironment'), event) apple_crash_report = unreal.get_apple_crash_report() if apple_crash_report: merge_apple_crash_report(apple_crash_report, event) is_apple_crash_report = True except (ProcessMinidumpError, Unreal4Error) as e: minidumps_logger.exception(e) track_outcome( project_config.organization_id, project_config.project_id, None, Outcome.INVALID, "process_minidump_unreal") raise APIError(e.message.split('\n', 1)[0]) try: unreal_context = unreal.get_context() if unreal_context is not None: merge_unreal_context_event(unreal_context, event, project) except Unreal4Error as e: # we'll continue without the context data minidumps_logger.exception(e) try: unreal_logs = unreal.get_logs() if unreal_logs is not None: merge_unreal_logs_event(unreal_logs, event) except Unreal4Error as e: # we'll continue without the breadcrumbs minidumps_logger.exception(e) is_minidump = False for file in unreal.files(): # Known attachment: msgpack event if file.name == "__sentry-event": merge_attached_event(file.open_stream(), event) continue if file.name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"): merge_attached_breadcrumbs(file.open_stream(), event) continue if file.type == "minidump" and not is_apple_crash_report: is_minidump = True # Always store the minidump in attachments so we can access it during # processing, regardless of the event-attachments feature. This will # allow us to stack walk again with CFI once symbols are loaded. if file.type == "minidump" or attachments_enabled: attachments.append(CachedAttachment( name=file.name, data=file.open_stream().read(), type=unreal_attachment_type(file), )) if is_minidump: write_minidump_placeholder(event) event_id = self.process( request, attachments=attachments, data=event, project=project, project_config=project_config, **kwargs) # The return here is only useful for consistency # because the UE4 crash reporter doesn't care about it. return HttpResponse( six.text_type(uuid.UUID(event_id)), content_type='text/plain' )
def _do_save_event( cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs ): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome from sentry.ingest.outcomes_consumer import mark_signal_sent if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data["event_id"] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop("project") key_id = None if data is None else data.get("key_id") if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None # We only need to delete raw events for events that support # reprocessing. If the data cannot be found we want to assume # that we need to delete the raw event. if not data or reprocessing.event_supports_reprocessing(data): delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( "events.failed", tags={"reason": "cache", "stage": "post"}, skip_internal=False ) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) # event.project.organization is populated after this statement. event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has("organizations:event-attachments", event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id, ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) # There is no signal supposed to be sent for this particular # outcome-reason combination. Prevent the outcome consumer from # emitting it for now. # # XXX(markus): Revisit decision about signals once outcomes consumer is stable. mark_signal_sent(project_id, event_id) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id, ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or features.has( "organizations:event-attachments", event.project.organization, actor=None ): attachment_cache.delete(cache_key) if start_time: metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
def process(self, request, project, key, auth, helper, data, project_config, attachments=None, **kwargs): disable_transaction_events() metrics.incr("events.total", skip_internal=False) project_id = project_config.project_id organization_id = project_config.organization_id if not data: track_outcome(organization_id, project_id, key.id, Outcome.INVALID, "no_data") raise APIError("No JSON data was found") remote_addr = request.META["REMOTE_ADDR"] event_manager = EventManager( data, project=project, key=key, auth=auth, client_ip=remote_addr, user_agent=helper.context.agent, version=auth.version, content_encoding=request.META.get("HTTP_CONTENT_ENCODING", ""), project_config=project_config, ) del data self.pre_normalize(event_manager, helper) try: event_manager.normalize() except ProcessingErrorInvalidTransaction as e: track_outcome(organization_id, project_id, key.id, Outcome.INVALID, "invalid_transaction") raise APIError(six.text_type(e).split("\n", 1)[0]) data = event_manager.get_data() dict_data = dict(data) data_size = len(json.dumps(dict_data)) if data_size > 10000000: metrics.timing("events.size.rejected", data_size) track_outcome( organization_id, project_id, key.id, Outcome.INVALID, "too_large", event_id=dict_data.get("event_id"), ) raise APIForbidden("Event size exceeded 10MB after normalization.") metrics.timing("events.size.data.post_storeendpoint", data_size) return process_event(event_manager, project, key, remote_addr, helper, attachments, project_config)
def post(self, request, project, **kwargs): # Minidump request payloads do not have the same structure as # usual events from other SDKs. Most notably, the event needs # to be transfered in the `sentry` form field. All other form # fields are assumed "extra" information. The only exception # to this is `upload_file_minidump`, which contains the minidump. if any(key.startswith('sentry[') for key in request.POST): # First, try to parse the nested form syntax `sentry[key][key]` # This is required for the Breakpad client library, which only # supports string values of up to 64 characters. extra = parser.parse(request.POST.urlencode()) data = extra.pop('sentry', {}) else: # Custom clients can submit longer payloads and should JSON # encode event data into the optional `sentry` field. extra = request.POST json_data = extra.pop('sentry', None) data = json.loads(json_data[0]) if json_data else {} # Merge additional form fields from the request with `extra` # data from the event payload and set defaults for processing. extra.update(data.get('extra', {})) data['extra'] = extra # Assign our own UUID so we can track this minidump. We cannot trust the # uploaded filename, and if reading the minidump fails there is no way # we can ever retrieve the original UUID from the minidump. event_id = data.get('event_id') or uuid.uuid4().hex data['event_id'] = event_id # At this point, we only extract the bare minimum information # needed to continue processing. This requires to process the # minidump without symbols and CFI to obtain an initial stack # trace (most likely via stack scanning). If all validations # pass, the event will be inserted into the database. try: minidump = request.FILES['upload_file_minidump'] except KeyError: track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "missing_minidump_upload") raise APIError('Missing minidump upload') # Breakpad on linux sometimes stores the entire HTTP request body as # dump file instead of just the minidump. The Electron SDK then for # example uploads a multipart formdata body inside the minidump file. # It needs to be re-parsed, to extract the actual minidump before # continuing. minidump.seek(0) if minidump.read(2) == b'--': # The remaining bytes of the first line are the form boundary. We # have already read two bytes, the remainder is the form boundary # (excluding the initial '--'). boundary = minidump.readline().rstrip() minidump.seek(0) # Next, we have to fake a HTTP request by specifying the form # boundary and the content length, or otherwise Django will not try # to parse our form body. Also, we need to supply new upload # handlers since they cannot be reused from the current request. meta = { 'CONTENT_TYPE': b'multipart/form-data; boundary=%s' % boundary, 'CONTENT_LENGTH': minidump.size, } handlers = [ uploadhandler.load_handler(handler, request) for handler in settings.FILE_UPLOAD_HANDLERS ] _, files = MultiPartParser(meta, minidump, handlers).parse() try: minidump = files['upload_file_minidump'] except KeyError: track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "missing_minidump_upload") raise APIError('Missing minidump upload') if minidump.size == 0: track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "empty_minidump") raise APIError('Empty minidump upload received') if settings.SENTRY_MINIDUMP_CACHE: if not os.path.exists(settings.SENTRY_MINIDUMP_PATH): os.mkdir(settings.SENTRY_MINIDUMP_PATH, 0o744) with open('%s/%s.dmp' % (settings.SENTRY_MINIDUMP_PATH, event_id), 'wb') as out: for chunk in minidump.chunks(): out.write(chunk) # Always store the minidump in attachments so we can access it during # processing, regardless of the event-attachments feature. This will # allow us to stack walk again with CFI once symbols are loaded. attachments = [] minidump.seek(0) attachments.append(CachedAttachment.from_upload(minidump, type=MINIDUMP_ATTACHMENT_TYPE)) has_event_attachments = features.has('organizations:event-attachments', project.organization, actor=request.user) # Append all other files as generic attachments. We can skip this if the # feature is disabled since they won't be saved. for name, file in six.iteritems(request.FILES): if name == 'upload_file_minidump': continue # Known attachment: msgpack event if name == "__sentry-event": merge_attached_event(file, data) continue if name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"): merge_attached_breadcrumbs(file, data) continue # Add any other file as attachment if has_event_attachments: attachments.append(CachedAttachment.from_upload(file)) try: state = process_minidump(minidump) merge_process_state_event(data, state) except ProcessMinidumpError as e: minidumps_logger.exception(e) track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "process_minidump") raise APIError(e.message.split('\n', 1)[0]) event_id = self.process( request, attachments=attachments, data=data, project=project, **kwargs) # Return the formatted UUID of the generated event. This is # expected by the Electron http uploader on Linux and doesn't # break the default Breakpad client library. return HttpResponse( six.text_type(uuid.UUID(event_id)), content_type='text/plain' )
def process_event(event_manager, project, key, remote_addr, helper, attachments): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) event_id = event_manager.get_data().get('event_id') start_time = time() should_filter, filter_reason = event_manager.should_filter() if should_filter: track_outcome(project.organization_id, project.id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id) metrics.incr('events.blacklisted', tags={'reason': filter_reason}, skip_internal=False) event_filtered.send_robust( ip=remote_addr, project=project, sender=process_event, ) raise APIForbidden('Event dropped due to filter: %s' % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug('Dropped event due to error with rate limiter') reason = rate_limit.reason_code if rate_limit else None track_outcome(project.organization_id, project.id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id) metrics.incr( 'events.dropped', tags={ 'reason': reason or 'unknown', }, skip_internal=False, ) event_dropped.send_robust( ip=remote_addr, project=project, reason_code=reason, sender=process_event, ) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) org_options = OrganizationOption.objects.get_all_values( project.organization_id) data = event_manager.get_data() del event_manager event_id = data['event_id'] # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = 'ev:%s:%s' % ( project.id, event_id, ) if cache.get(cache_key) is not None: track_outcome(project.organization_id, project.id, key.id, Outcome.INVALID, 'duplicate', event_id=event_id) raise APIForbidden('An event with the same ID already exists (%s)' % (event_id, )) scrub_ip_address = ( org_options.get('sentry:require_scrub_ip_address', False) or project.get_option('sentry:scrub_ip_address', False)) scrub_data = (org_options.get('sentry:require_scrub_data', False) or project.get_option('sentry:scrub_data', True)) if scrub_data: # We filter data immediately before it ever gets into the queue sensitive_fields_key = 'sentry:sensitive_fields' sensitive_fields = (org_options.get(sensitive_fields_key, []) + project.get_option(sensitive_fields_key, [])) exclude_fields_key = 'sentry:safe_fields' exclude_fields = (org_options.get(exclude_fields_key, []) + project.get_option(exclude_fields_key, [])) scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or project.get_option('sentry:scrub_defaults', True)) SensitiveDataFilter( fields=sensitive_fields, include_defaults=scrub_defaults, exclude_fields=exclude_fields, ).apply(data) if scrub_ip_address: # We filter data immediately before it ever gets into the queue helper.ensure_does_not_have_ip(data) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, '', 60 * 5) api_logger.debug('New event received (%s)', event_id) event_accepted.send_robust( ip=remote_addr, data=data, project=project, sender=process_event, ) return event_id
def post(self, request, project, **kwargs): attachments_enabled = features.has('organizations:event-attachments', project.organization, actor=request.user) attachments = [] event = {'event_id': uuid.uuid4().hex} try: unreal = process_unreal_crash(request.body, request.GET.get( 'UserID'), request.GET.get('AppEnvironment'), event) process_state = unreal.process_minidump() if process_state: merge_process_state_event(event, process_state) else: apple_crash_report = unreal.get_apple_crash_report() if apple_crash_report: merge_apple_crash_report(apple_crash_report, event) else: track_outcome(project.organization_id, project.id, None, Outcome.INVALID, "missing_minidump_unreal") raise APIError("missing minidump in unreal crash report") except (ProcessMinidumpError, Unreal4Error) as e: minidumps_logger.exception(e) track_outcome( project.organization_id, project.id, None, Outcome.INVALID, "process_minidump_unreal") raise APIError(e.message.split('\n', 1)[0]) try: unreal_context = unreal.get_context() if unreal_context is not None: merge_unreal_context_event(unreal_context, event, project) except Unreal4Error as e: # we'll continue without the context data minidumps_logger.exception(e) try: unreal_logs = unreal.get_logs() if unreal_logs is not None: merge_unreal_logs_event(unreal_logs, event) except Unreal4Error as e: # we'll continue without the breadcrumbs minidumps_logger.exception(e) for file in unreal.files(): # Known attachment: msgpack event if file.name == "__sentry-event": merge_attached_event(file.open_stream(), event) continue if file.name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"): merge_attached_breadcrumbs(file.open_stream(), event) continue # Always store the minidump in attachments so we can access it during # processing, regardless of the event-attachments feature. This will # allow us to stack walk again with CFI once symbols are loaded. if file.type == "minidump" or attachments_enabled: attachments.append(CachedAttachment( name=file.name, data=file.open_stream().read(), type=unreal_attachment_type(file), )) event_id = self.process( request, attachments=attachments, data=event, project=project, **kwargs) # The return here is only useful for consistency # because the UE4 crash reporter doesn't care about it. return HttpResponse( six.text_type(uuid.UUID(event_id)), content_type='text/plain' )
def process_event(event_manager, project, key, remote_addr, helper, attachments, project_config): event_received.send_robust(ip=remote_addr, project=project, sender=process_event) start_time = time() data = event_manager.get_data() should_filter, filter_reason = event_manager.should_filter() del event_manager event_id = data["event_id"] if should_filter: signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_filtered signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_filtered.send_robust` below. mark_signal_sent(project_config.project_id, event_id) track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.FILTERED, filter_reason, event_id=event_id, ) metrics.incr("events.blacklisted", tags={"reason": filter_reason}, skip_internal=False) if not signals_in_consumer: event_filtered.send_robust(ip=remote_addr, project=project, sender=process_event) # relay will no longer be able to provide information about filter # status so to see the impact we're adding a way to turn on relay # like behavior here. if options.get("store.lie-about-filter-status"): return event_id raise APIForbidden("Event dropped due to filter: %s" % (filter_reason, )) # TODO: improve this API (e.g. make RateLimit act on __ne__) rate_limit = safe_execute(quotas.is_rate_limited, project=project, key=key, _with_transaction=False) if isinstance(rate_limit, bool): rate_limit = RateLimit(is_limited=rate_limit, retry_after=None) # XXX(dcramer): when the rate limiter fails we drop events to ensure # it cannot cascade if rate_limit is None or rate_limit.is_limited: if rate_limit is None: api_logger.debug("Dropped event due to error with rate limiter") signals_in_consumer = decide_signals_in_consumer() if not signals_in_consumer: # Mark that the event_dropped signal is sent. Do this before emitting # the outcome to avoid a potential race between OutcomesConsumer and # `event_dropped.send_robust` below. mark_signal_sent(project_config.project_id, event_id) reason = rate_limit.reason_code if rate_limit else None track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.RATE_LIMITED, reason, event_id=event_id, ) metrics.incr("events.dropped", tags={"reason": reason or "unknown"}, skip_internal=False) if not signals_in_consumer: event_dropped.send_robust(ip=remote_addr, project=project, reason_code=reason, sender=process_event) if rate_limit is not None: raise APIRateLimited(rate_limit.retry_after) # TODO(dcramer): ideally we'd only validate this if the event_id was # supplied by the user cache_key = "ev:%s:%s" % (project_config.project_id, event_id) if cache.get(cache_key) is not None: track_outcome( project_config.organization_id, project_config.project_id, key.id, Outcome.INVALID, "duplicate", event_id=event_id, ) raise APIForbidden("An event with the same ID already exists (%s)" % (event_id, )) datascrubbing_settings = project_config.config.get( "datascrubbingSettings") or {} data = semaphore.scrub_event(datascrubbing_settings, dict(data)) # mutates data (strips a lot of context if not queued) helper.insert_data_to_database(data, start_time=start_time, attachments=attachments) cache.set(cache_key, "", 60 * 60) # Cache for 1 hour api_logger.debug("New event received (%s)", event_id) event_accepted.send_robust(ip=remote_addr, data=data, project=project, sender=process_event) return event_id
def post(self, request, project, project_config, **kwargs): # Minidump request payloads do not have the same structure as usual # events from other SDKs. The minidump can either be transmitted as # request body, or as `upload_file_minidump` in a multipart formdata # request. Optionally, an event payload can be sent in the `sentry` form # field, either as JSON or as nested form data. request_files = request.FILES or {} content_type = request.META.get("CONTENT_TYPE") if content_type in self.dump_types: minidump = io.BytesIO(request.body) minidump_name = "Minidump" data = {} else: minidump = request_files.get("upload_file_minidump") minidump_name = minidump and minidump.name or None if any(key.startswith("sentry[") for key in request.POST): # First, try to parse the nested form syntax `sentry[key][key]` # This is required for the Breakpad client library, which only # supports string values of up to 64 characters. extra = parser.parse(request.POST.urlencode()) data = extra.pop("sentry", {}) else: # Custom clients can submit longer payloads and should JSON # encode event data into the optional `sentry` field. extra = request.POST.dict() json_data = extra.pop("sentry", None) try: data = json.loads(json_data) if json_data else {} except ValueError: data = {} if not isinstance(data, dict): data = {} # Merge additional form fields from the request with `extra` data # from the event payload and set defaults for processing. This is # sent by clients like Breakpad or Crashpad. extra.update(data.get("extra", {})) data["extra"] = extra if not minidump: track_outcome( project_config.organization_id, project_config.project_id, None, Outcome.INVALID, "missing_minidump_upload", ) raise APIError("Missing minidump upload") # Breakpad on linux sometimes stores the entire HTTP request body as # dump file instead of just the minidump. The Electron SDK then for # example uploads a multipart formdata body inside the minidump file. # It needs to be re-parsed, to extract the actual minidump before # continuing. minidump.seek(0) if minidump.read(2) == b"--": # The remaining bytes of the first line are the form boundary. We # have already read two bytes, the remainder is the form boundary # (excluding the initial '--'). boundary = minidump.readline().rstrip() minidump.seek(0) # Next, we have to fake a HTTP request by specifying the form # boundary and the content length, or otherwise Django will not try # to parse our form body. Also, we need to supply new upload # handlers since they cannot be reused from the current request. meta = { "CONTENT_TYPE": b"multipart/form-data; boundary=%s" % boundary, "CONTENT_LENGTH": minidump.size, } handlers = [ uploadhandler.load_handler(handler, request) for handler in settings.FILE_UPLOAD_HANDLERS ] _, inner_files = MultiPartParser(meta, minidump, handlers).parse() try: minidump = inner_files["upload_file_minidump"] minidump_name = minidump.name except KeyError: track_outcome( project_config.organization_id, project_config.project_id, None, Outcome.INVALID, "missing_minidump_upload", ) raise APIError("Missing minidump upload") minidump.seek(0) if minidump.read(4) != "MDMP": track_outcome( project_config.organization_id, project_config.project_id, None, Outcome.INVALID, "invalid_minidump", ) raise APIError("Uploaded file was not a minidump") # Always store the minidump in attachments so we can access it during # processing, regardless of the event-attachments feature. This is # required to process the minidump with debug information. attachments = [] # The minidump attachment is special. It has its own attachment type to # distinguish it from regular attachments for processing. Also, it might # not be part of `request_files` if it has been uploaded as raw request # body instead of a multipart formdata request. minidump.seek(0) attachments.append( CachedAttachment( name=minidump_name, content_type="application/octet-stream", data=minidump.read(), type=MINIDUMP_ATTACHMENT_TYPE, )) # Append all other files as generic attachments. # RaduW 4 Jun 2019 always sent attachments for minidump (does not use # event-attachments feature) for name, file in six.iteritems(request_files): if name == "upload_file_minidump": continue # Known attachment: msgpack event if name == "__sentry-event": merge_attached_event(file, data) continue if name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"): merge_attached_breadcrumbs(file, data) continue # Add any other file as attachment attachments.append(CachedAttachment.from_upload(file)) # Assign our own UUID so we can track this minidump. We cannot trust # the uploaded filename, and if reading the minidump fails there is # no way we can ever retrieve the original UUID from the minidump. event_id = data.get("event_id") or uuid.uuid4().hex data["event_id"] = event_id # Write a minimal event payload that is required to kick off native # event processing. It is also used as fallback if processing of the # minidump fails. # NB: This occurs after merging attachments to overwrite potentially # contradicting payloads transmitted in __sentry_event. write_minidump_placeholder(data) event_id = self.process(request, attachments=attachments, data=data, project=project, project_config=project_config, **kwargs) # Return the formatted UUID of the generated event. This is # expected by the Electron http uploader on Linux and doesn't # break the default Breakpad client library. return HttpResponse(six.text_type(uuid.UUID(event_id)), content_type="text/plain")
def post(self, request, project, project_config, **kwargs): attachments_enabled = features.has("organizations:event-attachments", project.organization, actor=request.user) attachments = [] event = { "event_id": uuid.uuid4().hex, "environment": request.GET.get("AppEnvironment") } user_id = request.GET.get("UserID") if user_id: merge_unreal_user(event, user_id) try: unreal = Unreal4Crash.from_bytes(request.body) except (ProcessMinidumpError, Unreal4Error) as e: minidumps_logger.exception(e) track_outcome( project_config.organization_id, project_config.project_id, None, Outcome.INVALID, "process_unreal", ) raise APIError(e.message.split("\n", 1)[0]) try: unreal_context = unreal.get_context() except Unreal4Error as e: # we'll continue without the context data unreal_context = None minidumps_logger.exception(e) else: if unreal_context is not None: merge_unreal_context_event(unreal_context, event, project) try: unreal_logs = unreal.get_logs() except Unreal4Error as e: # we'll continue without the breadcrumbs minidumps_logger.exception(e) else: if unreal_logs is not None: merge_unreal_logs_event(unreal_logs, event) is_minidump = False is_applecrashreport = False for file in unreal.files(): # Known attachment: msgpack event if file.name == "__sentry-event": merge_attached_event(file.open_stream(), event) continue if file.name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"): merge_attached_breadcrumbs(file.open_stream(), event) continue if file.type == "minidump": is_minidump = True if file.type == "applecrashreport": is_applecrashreport = True # Always store attachments that can be processed, regardless of the # event-attachments feature. if file.type in self.required_attachments or attachments_enabled: attachments.append( CachedAttachment( name=file.name, data=file.open_stream().read(), type=unreal_attachment_type(file), )) if is_minidump: write_minidump_placeholder(event) elif is_applecrashreport: write_applecrashreport_placeholder(event) event_id = self.process(request, attachments=attachments, data=event, project=project, project_config=project_config, **kwargs) # The return here is only useful for consistency # because the UE4 crash reporter doesn't care about it. return HttpResponse(six.text_type(uuid.UUID(event_id)), content_type="text/plain")
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs): """ Saves an event to the database. """ from sentry.event_manager import HashDiscarded, EventManager from sentry import quotas from sentry.models import ProjectKey from sentry.utils.outcomes import Outcome, track_outcome if cache_key and data is None: data = default_cache.get(cache_key) if data is not None: data = CanonicalKeyDict(data) if event_id is None and data is not None: event_id = data['event_id'] # only when we come from reprocessing we get a project_id sent into # the task. if project_id is None: project_id = data.pop('project') key_id = None if data is None else data.get('key_id') if key_id is not None: key_id = int(key_id) timestamp = to_datetime(start_time) if start_time is not None else None delete_raw_event(project_id, event_id, allow_hint_clear=True) # This covers two cases: where data is None because we did not manage # to fetch it from the default cache or the empty dictionary was # stored in the default cache. The former happens if the event # expired while being on the queue, the second happens on reprocessing # if the raw event was deleted concurrently while we held on to # it. This causes the node store to delete the data and we end up # fetching an empty dict. We could in theory not invoke `save_event` # in those cases but it's important that we always clean up the # reprocessing reports correctly or they will screw up the UI. So # to future proof this correctly we just handle this case here. if not data: metrics.incr( 'events.failed', tags={ 'reason': 'cache', 'stage': 'post'}, skip_internal=False) return with configure_scope() as scope: scope.set_tag("project", project_id) event = None try: manager = EventManager(data) event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. if features.has('organizations:event-attachments', event.project.organization, actor=None): attachments = attachment_cache.get(cache_key) or [] for attachment in attachments: save_attachment(event, attachment) # This is where we can finally say that we have accepted the event. track_outcome( event.project.organization_id, event.project.id, key_id, Outcome.ACCEPTED, None, timestamp, event_id ) except HashDiscarded: project = Project.objects.get_from_cache(id=project_id) reason = FilterStatKeys.DISCARDED_HASH project_key = None try: if key_id is not None: project_key = ProjectKey.objects.get_from_cache(id=key_id) except ProjectKey.DoesNotExist: pass quotas.refund(project, key=project_key, timestamp=start_time) track_outcome( project.organization_id, project_id, key_id, Outcome.FILTERED, reason, timestamp, event_id ) finally: if cache_key: default_cache.delete(cache_key) # For the unlikely case that we did not manage to persist the # event we also delete the key always. if event is None or \ features.has('organizations:event-attachments', event.project.organization, actor=None): attachment_cache.delete(cache_key) if start_time: metrics.timing( 'events.time-to-process', time() - start_time, instance=data['platform'])
def discard_event(job, attachments): """ Refunds consumed quotas for an event and its attachments. For the event and each dropped attachment, an outcome FILTERED(discarded-hash) is emitted. :param job: The job context container. :param attachments: The full list of attachments to filter. """ project = job["event"].project quotas.refund( project, key=job["project_key"], timestamp=job["start_time"], category=job["category"], quantity=1, ) track_outcome( org_id=project.organization_id, project_id=job["project_id"], key_id=job["key_id"], outcome=Outcome.FILTERED, reason=FilterStatKeys.DISCARDED_HASH, timestamp=to_datetime(job["start_time"]), event_id=job["event"].event_id, category=job["category"], ) attachment_quantity = 0 for attachment in attachments: # Quotas are counted with at least ``1`` for attachments. attachment_quantity += attachment.size or 1 track_outcome( org_id=project.organization_id, project_id=job["project_id"], key_id=job["key_id"], outcome=Outcome.FILTERED, reason=FilterStatKeys.DISCARDED_HASH, timestamp=to_datetime(job["start_time"]), event_id=job["event"].event_id, category=DataCategory.ATTACHMENT, quantity=attachment.size, ) if attachment_quantity: quotas.refund( project, key=job["project_key"], timestamp=job["start_time"], category=DataCategory.ATTACHMENT, quantity=attachment_quantity, ) metrics.incr( "events.discarded", skip_internal=True, tags={"organization_id": project.organization_id, "platform": job["platform"]}, )