예제 #1
0
파일: api.py 프로젝트: yaoqi/sentry
 def _get_project_from_id(self, project_id):
     if not project_id:
         return
     if not project_id.isdigit():
         track_outcome(0, 0, None, Outcome.INVALID, "project_id")
         raise APIError('Invalid project_id: %r' % project_id)
     try:
         return Project.objects.get_from_cache(id=project_id)
     except Project.DoesNotExist:
         track_outcome(0, 0, None, Outcome.INVALID, "project_id")
         raise APIError('Invalid project_id: %r' % project_id)
예제 #2
0
파일: api.py 프로젝트: yaoqi/sentry
    def process(self, request, project, key, auth, helper, data, attachments=None, **kwargs):
        metrics.incr('events.total', skip_internal=False)

        if not data:
            track_outcome(project.organization_id, project.id, key.id, Outcome.INVALID, "no_data")
            raise APIError('No JSON data was found')

        remote_addr = request.META['REMOTE_ADDR']

        event_manager = EventManager(
            data,
            project=project,
            key=key,
            auth=auth,
            client_ip=remote_addr,
            user_agent=helper.context.agent,
            version=auth.version,
            content_encoding=request.META.get('HTTP_CONTENT_ENCODING', ''),
        )
        del data

        self.pre_normalize(event_manager, helper)
        event_manager.normalize()

        data = event_manager.get_data()
        dict_data = dict(data)
        data_size = len(json.dumps(dict_data))

        if data_size > 10000000:
            metrics.timing('events.size.rejected', data_size)
            track_outcome(
                project.organization_id,
                project.id,
                key.id,
                Outcome.INVALID,
                'too_large',
                event_id=dict_data.get('event_id')
            )
            raise APIForbidden("Event size exceeded 10MB after normalization.")

        metrics.timing(
            'events.size.data.post_storeendpoint',
            data_size,
            tags={'project_id': project.id}
        )

        return process_event(event_manager, project,
                             key, remote_addr, helper, attachments)
예제 #3
0
파일: api.py 프로젝트: yaoqi/sentry
    def _parse_header(self, request, helper, project):
        auth = self.auth_helper_cls.auth_from_request(request)

        if auth.version not in PROTOCOL_VERSIONS:
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "auth_version")
            raise APIError(
                'Client using unsupported server protocol version (%r)' %
                six.text_type(auth.version or '')
            )

        if not auth.client:
            track_outcome(project.organization_id, project.id, None, Outcome.INVALID, "auth_client")
            raise APIError("Client did not send 'client' identifier")

        return auth
예제 #4
0
파일: api.py 프로젝트: yaoqi/sentry
    def _dispatch(self, request, helper, sentry_key, project_id=None, origin=None, *args, **kwargs):
        if request.method != 'POST':
            track_outcome(0, 0, None, Outcome.INVALID, "disallowed_method")
            return HttpResponseNotAllowed(['POST'])

        content_type = request.META.get('CONTENT_TYPE')
        if content_type is None or not content_type.startswith(self.content_types):
            track_outcome(0, 0, None, Outcome.INVALID, "content_type")
            raise APIError('Invalid Content-Type')

        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        helper.context.bind_project(project)

        auth = Auth({'sentry_key': sentry_key}, is_public=False)
        auth.client = 'sentry.unreal_engine'

        key = helper.project_key_from_auth(auth)
        if key.project_id != project.id:
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "multi_project_id")
            raise APIError('Two different projects were specified')

        helper.context.bind_auth(auth)
        return super(APIView, self).dispatch(
            request=request, project=project, auth=auth, helper=helper, key=key, **kwargs
        )
예제 #5
0
파일: api.py 프로젝트: yaoqi/sentry
    def post(self, request, project, helper, key, **kwargs):
        json_body = safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            track_outcome(
                project.organization_id,
                project.id,
                key.id,
                Outcome.INVALID,
                "security_report_type")
            raise APIError('Unrecognized security report type')
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            track_outcome(
                project.organization_id,
                project.id,
                key.id,
                Outcome.INVALID,
                "security_report")
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            if project:
                track_outcome(
                    project.organization_id,
                    project.id,
                    key.id,
                    Outcome.INVALID,
                    FilterStatKeys.CORS)
            raise APIForbidden('Invalid origin')

        data = {
            'interface': interface.path,
            'report': instance,
            'release': request.GET.get('sentry_release'),
            'environment': request.GET.get('sentry_environment'),
        }

        self.process(request, project=project, helper=helper, data=data, key=key, **kwargs)
        return HttpResponse(content_type='application/javascript', status=201)
예제 #6
0
파일: api.py 프로젝트: yaoqi/sentry
    def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs):
        # TODO(ja): Refactor shared code with CspReportView. Especially, look at
        # the sentry_key override and test it.

        # A minidump submission as implemented by Breakpad and Crashpad or any
        # other library following the Mozilla Soccorro protocol is a POST request
        # without Origin or Referer headers. Therefore, we cannot validate the
        # origin of the request, but we *can* validate the "prod" key in future.
        if request.method != 'POST':
            track_outcome(0, 0, None, Outcome.INVALID, "disallowed_method")
            return HttpResponseNotAllowed(['POST'])

        content_type = request.META.get('CONTENT_TYPE')
        # In case of multipart/form-data, the Content-Type header also includes
        # a boundary. Therefore, we cannot check for an exact match.
        if content_type is None or not content_type.startswith(self.content_types):
            track_outcome(0, 0, None, Outcome.INVALID, "content_type")
            raise APIError('Invalid Content-Type')

        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        helper.context.bind_project(project)

        # This is yanking the auth from the querystring since it's not
        # in the POST body. This means we expect a `sentry_key` and
        # `sentry_version` to be set in querystring
        auth = self.auth_helper_cls.auth_from_request(request)

        key = helper.project_key_from_auth(auth)
        if key.project_id != project.id:
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "multi_project_id")
            raise APIError('Two different projects were specified')

        helper.context.bind_auth(auth)

        return super(APIView, self).dispatch(
            request=request, project=project, auth=auth, helper=helper, key=key, **kwargs
        )
예제 #7
0
파일: api.py 프로젝트: yaoqi/sentry
    def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs):
        # A CSP report is sent as a POST request with no Origin or Referer
        # header. What we're left with is a 'document-uri' key which is
        # inside of the JSON body of the request. This 'document-uri' value
        # should be treated as an origin check since it refers to the page
        # that triggered the report. The Content-Type is supposed to be
        # `application/csp-report`, but FireFox sends it as `application/json`.
        if request.method != 'POST':
            track_outcome(0, 0, None, Outcome.INVALID, "disallowed_method")
            return HttpResponseNotAllowed(['POST'])

        if request.META.get('CONTENT_TYPE') not in self.content_types:
            track_outcome(0, 0, None, Outcome.INVALID, "content_type")
            raise APIError('Invalid Content-Type')

        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        helper.context.bind_project(project)

        # This is yanking the auth from the querystring since it's not
        # in the POST body. This means we expect a `sentry_key` and
        # `sentry_version` to be set in querystring
        auth = self.auth_helper_cls.auth_from_request(request)

        key = helper.project_key_from_auth(auth)
        if key.project_id != project.id:
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "multi_project_id")
            raise APIError('Two different projects were specified')

        helper.context.bind_auth(auth)

        return super(APIView, self).dispatch(
            request=request, project=project, auth=auth, helper=helper, key=key, **kwargs
        )
예제 #8
0
파일: api.py 프로젝트: aleksaToljic/sentry
    def _dispatch(self,
                  request,
                  helper,
                  project_id=None,
                  origin=None,
                  *args,
                  **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                track_outcome(project.organization_id, project.id, None,
                              Outcome.INVALID, FilterStatKeys.CORS)
                raise APIForbidden('Invalid origin: %s' % (origin, ))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            key = helper.project_key_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                project = Project.objects.get_from_cache(id=key.project_id)
                helper.context.bind_project(project)
            elif key.project_id != project.id:
                raise APIError('Two different projects were specified')

            helper.context.bind_auth(auth)

            # Explicitly bind Organization so we don't implicitly query it later
            # this just allows us to comfortably assure that `project.organization` is safe.
            # This also allows us to pull the object from cache, instead of being
            # implicitly fetched from database.
            project.organization = Organization.objects.get_from_cache(
                id=project.organization_id)

            response = super(APIView, self).dispatch(request=request,
                                                     project=project,
                                                     auth=auth,
                                                     helper=helper,
                                                     key=key,
                                                     **kwargs)

        if origin:
            if origin == 'null':
                # If an Origin is `null`, but we got this far, that means
                # we've gotten past our CORS check for some reason. But the
                # problem is that we can't return "null" as a valid response
                # to `Access-Control-Allow-Origin` and we don't have another
                # value to work with, so just allow '*' since they've gotten
                # this far.
                response['Access-Control-Allow-Origin'] = '*'
            else:
                response['Access-Control-Allow-Origin'] = origin

        return response
예제 #9
0
파일: api.py 프로젝트: yaoqi/sentry
    def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                track_outcome(
                    project.organization_id,
                    project.id,
                    None,
                    Outcome.INVALID,
                    FilterStatKeys.CORS)
                raise APIForbidden('Invalid origin: %s' % (origin, ))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            key = helper.project_key_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                project = Project.objects.get_from_cache(id=key.project_id)
                helper.context.bind_project(project)
            elif key.project_id != project.id:
                raise APIError('Two different projects were specified')

            helper.context.bind_auth(auth)

            # Explicitly bind Organization so we don't implicitly query it later
            # this just allows us to comfortably assure that `project.organization` is safe.
            # This also allows us to pull the object from cache, instead of being
            # implicitly fetched from database.
            project.organization = Organization.objects.get_from_cache(
                id=project.organization_id)

            response = super(APIView, self).dispatch(
                request=request, project=project, auth=auth, helper=helper, key=key, **kwargs
            )

        if origin:
            if origin == 'null':
                # If an Origin is `null`, but we got this far, that means
                # we've gotten past our CORS check for some reason. But the
                # problem is that we can't return "null" as a valid response
                # to `Access-Control-Allow-Origin` and we don't have another
                # value to work with, so just allow '*' since they've gotten
                # this far.
                response['Access-Control-Allow-Origin'] = '*'
            else:
                response['Access-Control-Allow-Origin'] = origin

        return response
예제 #10
0
def save_attachment(
    cache_key, attachment, project, event_id, key_id=None, group_id=None, start_time=None
):
    """
    Persists a cached event attachments into the file store.

    Emits one outcome, either ACCEPTED on success or INVALID(missing_chunks) if
    retrieving the attachment data fails.

    :param cache_key:  The cache key at which the attachment is stored for
                       debugging purposes.
    :param attachment: The ``CachedAttachment`` instance to store.
    :param project:    The project model that this attachment belongs to.
    :param event_id:   Identifier of the event that this attachment belongs to.
                       The event does not have to be stored yet.
    :param key_id:     Optional identifier of the DSN that was used to ingest
                       the attachment.
    :param group_id:   Optional group identifier for the event. May be empty if
                       the event has not been stored yet, or if it is not
                       grouped.
    :param start_time: UNIX Timestamp (float) when the attachment was ingested.
                       If missing, the current time is used.
    """
    if start_time is not None:
        timestamp = to_datetime(start_time)
    else:
        timestamp = datetime.utcnow().replace(tzinfo=UTC)

    try:
        data = attachment.data
    except MissingAttachmentChunks:
        track_outcome(
            org_id=project.organization_id,
            project_id=project.id,
            key_id=key_id,
            outcome=Outcome.INVALID,
            reason="missing_chunks",
            timestamp=timestamp,
            event_id=event_id,
            category=DataCategory.ATTACHMENT,
        )

        logger.exception("Missing chunks for cache_key=%s", cache_key)
        return

    file = File.objects.create(
        name=attachment.name,
        type=attachment.type,
        headers={"Content-Type": attachment.content_type},
    )
    file.putfile(six.BytesIO(data))

    EventAttachment.objects.create(
        event_id=event_id,
        project_id=project.id,
        group_id=group_id,
        name=attachment.name,
        file=file,
    )

    track_outcome(
        org_id=project.organization_id,
        project_id=project.id,
        key_id=key_id,
        outcome=Outcome.ACCEPTED,
        reason=None,
        timestamp=timestamp,
        event_id=event_id,
        category=DataCategory.ATTACHMENT,
        quantity=attachment.size or 1,
    )
예제 #11
0
def process_event(event_manager, project, key, remote_addr, helper,
                  attachments, project_config):
    event_received.send_robust(ip=remote_addr,
                               project=project,
                               sender=process_event)

    start_time = time()

    data = event_manager.get_data()
    should_filter, filter_reason = event_manager.should_filter()
    del event_manager

    event_id = data["event_id"]

    if should_filter:
        # Mark that the event_filtered signal is sent. Do this before emitting
        # the outcome to avoid a potential race between OutcomesConsumer and
        # `event_filtered.send_robust` below.
        mark_signal_sent(project_config.project_id, event_id)

        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.FILTERED,
            filter_reason,
            event_id=event_id,
        )
        metrics.incr("events.blacklisted",
                     tags={"reason": filter_reason},
                     skip_internal=False)
        event_filtered.send_robust(ip=remote_addr,
                                   project=project,
                                   sender=process_event)
        raise APIForbidden("Event dropped due to filter: %s" %
                           (filter_reason, ))

    # TODO: improve this API (e.g. make RateLimit act on __ne__)
    rate_limit = safe_execute(quotas.is_rate_limited,
                              project=project,
                              key=key,
                              _with_transaction=False)
    if isinstance(rate_limit, bool):
        rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

    # XXX(dcramer): when the rate limiter fails we drop events to ensure
    # it cannot cascade
    if rate_limit is None or rate_limit.is_limited:
        if rate_limit is None:
            api_logger.debug("Dropped event due to error with rate limiter")

        # Mark that the event_dropped signal is sent. Do this before emitting
        # the outcome to avoid a potential race between OutcomesConsumer and
        # `event_dropped.send_robust` below.
        mark_signal_sent(project_config.project_id, event_id)

        reason = rate_limit.reason_code if rate_limit else None
        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.RATE_LIMITED,
            reason,
            event_id=event_id,
        )
        metrics.incr("events.dropped",
                     tags={"reason": reason or "unknown"},
                     skip_internal=False)
        event_dropped.send_robust(ip=remote_addr,
                                  project=project,
                                  reason_code=reason,
                                  sender=process_event)
        if rate_limit is not None:
            raise APIRateLimited(rate_limit.retry_after)

    # TODO(dcramer): ideally we'd only validate this if the event_id was
    # supplied by the user
    cache_key = "ev:%s:%s" % (project_config.project_id, event_id)

    if cache.get(cache_key) is not None:
        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.INVALID,
            "duplicate",
            event_id=event_id,
        )
        raise APIForbidden("An event with the same ID already exists (%s)" %
                           (event_id, ))

    config = project_config.config
    datascrubbing_settings = config.get("datascrubbingSettings") or {}

    scrub_ip_address = datascrubbing_settings.get("scrubIpAddresses")

    scrub_data = datascrubbing_settings.get("scrubData")

    if scrub_data:
        # We filter data immediately before it ever gets into the queue
        sensitive_fields = datascrubbing_settings.get("sensitiveFields")

        exclude_fields = datascrubbing_settings.get("excludeFields")

        scrub_defaults = datascrubbing_settings.get("scrubDefaults")

        SensitiveDataFilter(fields=sensitive_fields,
                            include_defaults=scrub_defaults,
                            exclude_fields=exclude_fields).apply(data)

    if scrub_ip_address:
        # We filter data immediately before it ever gets into the queue
        helper.ensure_does_not_have_ip(data)

    # mutates data (strips a lot of context if not queued)
    helper.insert_data_to_database(data,
                                   start_time=start_time,
                                   attachments=attachments)

    cache.set(cache_key, "", 60 * 60)  # Cache for 1 hour

    api_logger.debug("New event received (%s)", event_id)

    event_accepted.send_robust(ip=remote_addr,
                               data=data,
                               project=project,
                               sender=process_event)

    return event_id
예제 #12
0
def filter_attachments_for_group(attachments, job):
    """
    Removes crash reports exceeding the group-limit.

    If the project or organization is configured to limit the amount of crash
    reports per group, the number of stored crashes is limited. This requires
    `event.group` to be set.

    Emits one outcome per removed attachment.

    :param attachments: The full list of attachments to filter.
    :param job:         The job context container.
    """
    if not attachments:
        return attachments

    event = job["event"]
    project = event.project

    # The setting is both an organization and project setting. The project
    # setting strictly overrides the organization setting, unless set to the
    # default.
    max_crashreports = get_max_crashreports(project)
    if not max_crashreports:
        max_crashreports = get_max_crashreports(project.organization)

    # The number of crash reports is cached per group
    crashreports_key = get_crashreport_key(event.group_id)

    # Only fetch the number of stored crash reports if there is a crash report
    # in the list of attachments. Otherwise, we won't require this number.
    if any(attachment.type in CRASH_REPORT_TYPES for attachment in attachments):
        cached_reports = get_stored_crashreports(crashreports_key, event, max_crashreports)
    else:
        cached_reports = 0
    stored_reports = cached_reports

    filtered = []
    refund_quantity = 0
    for attachment in attachments:
        # If the attachment is a crash report (e.g. minidump), we need to honor
        # the store_crash_reports setting. Otherwise, we assume that the client
        # has already verified PII and just store the attachment.
        if attachment.type in CRASH_REPORT_TYPES:
            if crashreports_exceeded(stored_reports, max_crashreports):
                track_outcome(
                    org_id=event.project.organization_id,
                    project_id=job["project_id"],
                    key_id=job["key_id"],
                    outcome=Outcome.FILTERED,
                    reason=FilterStatKeys.CRASH_REPORT_LIMIT,
                    timestamp=to_datetime(job["start_time"]),
                    event_id=event.event_id,
                    category=DataCategory.ATTACHMENT,
                    quantity=attachment.size,
                )

                # Quotas are counted with at least ``1`` for attachments.
                refund_quantity += attachment.size or 1
                continue
            stored_reports += 1

        filtered.append(attachment)

    # Check if we have exceeded the stored crash reports count. If so, we
    # persist the current maximum (not the actual number!) into the cache. Next
    # time when loading from the cache, we will validate that this number has
    # not changed, or otherwise re-fetch from the database.
    if crashreports_exceeded(stored_reports, max_crashreports) and stored_reports > cached_reports:
        cache.set(crashreports_key, max_crashreports, CRASH_REPORT_TIMEOUT)

    if refund_quantity:
        quotas.refund(
            project,
            key=job["project_key"],
            timestamp=job["start_time"],
            category=DataCategory.ATTACHMENT,
            quantity=refund_quantity,
        )

    return filtered
예제 #13
0
파일: api.py 프로젝트: yaoqi/sentry
def process_event(event_manager, project, key, remote_addr, helper, attachments):
    event_received.send_robust(ip=remote_addr, project=project, sender=process_event)

    start_time = time()

    data = event_manager.get_data()
    should_filter, filter_reason = event_manager.should_filter()
    del event_manager

    event_id = data['event_id']

    if should_filter:
        track_outcome(
            project.organization_id,
            project.id,
            key.id,
            Outcome.FILTERED,
            filter_reason,
            event_id=event_id
        )
        metrics.incr(
            'events.blacklisted', tags={'reason': filter_reason}, skip_internal=False
        )
        event_filtered.send_robust(
            ip=remote_addr,
            project=project,
            sender=process_event,
        )
        raise APIForbidden('Event dropped due to filter: %s' % (filter_reason,))

    # TODO: improve this API (e.g. make RateLimit act on __ne__)
    rate_limit = safe_execute(
        quotas.is_rate_limited, project=project, key=key, _with_transaction=False
    )
    if isinstance(rate_limit, bool):
        rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

    # XXX(dcramer): when the rate limiter fails we drop events to ensure
    # it cannot cascade
    if rate_limit is None or rate_limit.is_limited:
        if rate_limit is None:
            api_logger.debug('Dropped event due to error with rate limiter')

        reason = rate_limit.reason_code if rate_limit else None
        track_outcome(
            project.organization_id,
            project.id,
            key.id,
            Outcome.RATE_LIMITED,
            reason,
            event_id=event_id
        )
        metrics.incr(
            'events.dropped',
            tags={
                'reason': reason or 'unknown',
            },
            skip_internal=False,
        )
        event_dropped.send_robust(
            ip=remote_addr,
            project=project,
            reason_code=reason,
            sender=process_event,
        )
        if rate_limit is not None:
            raise APIRateLimited(rate_limit.retry_after)

    org_options = OrganizationOption.objects.get_all_values(
        project.organization_id)

    # TODO(dcramer): ideally we'd only validate this if the event_id was
    # supplied by the user
    cache_key = 'ev:%s:%s' % (project.id, event_id, )

    if cache.get(cache_key) is not None:
        track_outcome(
            project.organization_id,
            project.id,
            key.id,
            Outcome.INVALID,
            'duplicate',
            event_id=event_id
        )
        raise APIForbidden(
            'An event with the same ID already exists (%s)' % (event_id, ))

    scrub_ip_address = (org_options.get('sentry:require_scrub_ip_address', False) or
                        project.get_option('sentry:scrub_ip_address', False))
    scrub_data = (org_options.get('sentry:require_scrub_data', False) or
                  project.get_option('sentry:scrub_data', True))

    if scrub_data:
        # We filter data immediately before it ever gets into the queue
        sensitive_fields_key = 'sentry:sensitive_fields'
        sensitive_fields = (
            org_options.get(sensitive_fields_key, []) +
            project.get_option(sensitive_fields_key, [])
        )

        exclude_fields_key = 'sentry:safe_fields'
        exclude_fields = (
            org_options.get(exclude_fields_key, []) +
            project.get_option(exclude_fields_key, [])
        )

        scrub_defaults = (org_options.get('sentry:require_scrub_defaults', False) or
                          project.get_option('sentry:scrub_defaults', True))

        SensitiveDataFilter(
            fields=sensitive_fields,
            include_defaults=scrub_defaults,
            exclude_fields=exclude_fields,
        ).apply(data)

    if scrub_ip_address:
        # We filter data immediately before it ever gets into the queue
        helper.ensure_does_not_have_ip(data)

    # mutates data (strips a lot of context if not queued)
    helper.insert_data_to_database(data, start_time=start_time, attachments=attachments)

    cache.set(cache_key, '', 60 * 5)

    api_logger.debug('New event received (%s)', event_id)

    event_accepted.send_robust(
        ip=remote_addr,
        data=data,
        project=project,
        sender=process_event,
    )

    return event_id
예제 #14
0
    def post(self, request, project, **kwargs):
        # Minidump request payloads do not have the same structure as
        # usual events from other SDKs. Most notably, the event needs
        # to be transfered in the `sentry` form field. All other form
        # fields are assumed "extra" information. The only exception
        # to this is `upload_file_minidump`, which contains the minidump.

        if any(key.startswith('sentry[') for key in request.POST):
            # First, try to parse the nested form syntax `sentry[key][key]`
            # This is required for the Breakpad client library, which only
            # supports string values of up to 64 characters.
            extra = parser.parse(request.POST.urlencode())
            data = extra.pop('sentry', {})
        else:
            # Custom clients can submit longer payloads and should JSON
            # encode event data into the optional `sentry` field.
            extra = request.POST
            json_data = extra.pop('sentry', None)
            data = json.loads(json_data[0]) if json_data else {}

        # Merge additional form fields from the request with `extra`
        # data from the event payload and set defaults for processing.
        extra.update(data.get('extra', {}))
        data['extra'] = extra

        # Assign our own UUID so we can track this minidump. We cannot trust the
        # uploaded filename, and if reading the minidump fails there is no way
        # we can ever retrieve the original UUID from the minidump.
        event_id = data.get('event_id') or uuid.uuid4().hex
        data['event_id'] = event_id

        # At this point, we only extract the bare minimum information
        # needed to continue processing. This requires to process the
        # minidump without symbols and CFI to obtain an initial stack
        # trace (most likely via stack scanning). If all validations
        # pass, the event will be inserted into the database.
        try:
            minidump = request.FILES['upload_file_minidump']
        except KeyError:
            track_outcome(project.organization_id, project.id, None,
                          Outcome.INVALID, "missing_minidump_upload")
            raise APIError('Missing minidump upload')

        # Breakpad on linux sometimes stores the entire HTTP request body as
        # dump file instead of just the minidump. The Electron SDK then for
        # example uploads a multipart formdata body inside the minidump file.
        # It needs to be re-parsed, to extract the actual minidump before
        # continuing.
        minidump.seek(0)
        if minidump.read(2) == b'--':
            # The remaining bytes of the first line are the form boundary. We
            # have already read two bytes, the remainder is the form boundary
            # (excluding the initial '--').
            boundary = minidump.readline().rstrip()
            minidump.seek(0)

            # Next, we have to fake a HTTP request by specifying the form
            # boundary and the content length, or otherwise Django will not try
            # to parse our form body. Also, we need to supply new upload
            # handlers since they cannot be reused from the current request.
            meta = {
                'CONTENT_TYPE': b'multipart/form-data; boundary=%s' % boundary,
                'CONTENT_LENGTH': minidump.size,
            }
            handlers = [
                uploadhandler.load_handler(handler, request)
                for handler in settings.FILE_UPLOAD_HANDLERS
            ]

            _, files = MultiPartParser(meta, minidump, handlers).parse()
            try:
                minidump = files['upload_file_minidump']
            except KeyError:
                track_outcome(project.organization_id, project.id, None,
                              Outcome.INVALID, "missing_minidump_upload")
                raise APIError('Missing minidump upload')

        if minidump.size == 0:
            track_outcome(project.organization_id, project.id, None,
                          Outcome.INVALID, "empty_minidump")
            raise APIError('Empty minidump upload received')

        if settings.SENTRY_MINIDUMP_CACHE:
            if not os.path.exists(settings.SENTRY_MINIDUMP_PATH):
                os.mkdir(settings.SENTRY_MINIDUMP_PATH, 0o744)

            with open('%s/%s.dmp' % (settings.SENTRY_MINIDUMP_PATH, event_id),
                      'wb') as out:
                for chunk in minidump.chunks():
                    out.write(chunk)

        # Always store the minidump in attachments so we can access it during
        # processing, regardless of the event-attachments feature. This will
        # allow us to stack walk again with CFI once symbols are loaded.
        attachments = []
        minidump.seek(0)
        attachments.append(
            CachedAttachment.from_upload(minidump,
                                         type=MINIDUMP_ATTACHMENT_TYPE))
        has_event_attachments = features.has('organizations:event-attachments',
                                             project.organization,
                                             actor=request.user)

        # Append all other files as generic attachments. We can skip this if the
        # feature is disabled since they won't be saved.
        for name, file in six.iteritems(request.FILES):
            if name == 'upload_file_minidump':
                continue

            # Known attachment: msgpack event
            if name == "__sentry-event":
                merge_attached_event(file, data)
                continue
            if name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"):
                merge_attached_breadcrumbs(file, data)
                continue

            # Add any other file as attachment
            if has_event_attachments:
                attachments.append(CachedAttachment.from_upload(file))

        write_minidump_placeholder(data)

        event_id = self.process(request,
                                attachments=attachments,
                                data=data,
                                project=project,
                                **kwargs)

        # Return the formatted UUID of the generated event. This is
        # expected by the Electron http uploader on Linux and doesn't
        # break the default Breakpad client library.
        return HttpResponse(six.text_type(uuid.UUID(event_id)),
                            content_type='text/plain')
예제 #15
0
    def post(self, request, project, project_config, **kwargs):
        attachments_enabled = features.has('organizations:event-attachments',
                                           project.organization, actor=request.user)

        is_apple_crash_report = False

        attachments = []
        event = {'event_id': uuid.uuid4().hex}
        try:
            unreal = process_unreal_crash(request.body, request.GET.get(
                'UserID'), request.GET.get('AppEnvironment'), event)

            apple_crash_report = unreal.get_apple_crash_report()
            if apple_crash_report:
                merge_apple_crash_report(apple_crash_report, event)
                is_apple_crash_report = True
        except (ProcessMinidumpError, Unreal4Error) as e:
            minidumps_logger.exception(e)
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                None,
                Outcome.INVALID,
                "process_minidump_unreal")
            raise APIError(e.message.split('\n', 1)[0])

        try:
            unreal_context = unreal.get_context()
            if unreal_context is not None:
                merge_unreal_context_event(unreal_context, event, project)
        except Unreal4Error as e:
            # we'll continue without the context data
            minidumps_logger.exception(e)

        try:
            unreal_logs = unreal.get_logs()
            if unreal_logs is not None:
                merge_unreal_logs_event(unreal_logs, event)
        except Unreal4Error as e:
            # we'll continue without the breadcrumbs
            minidumps_logger.exception(e)

        is_minidump = False

        for file in unreal.files():
            # Known attachment: msgpack event
            if file.name == "__sentry-event":
                merge_attached_event(file.open_stream(), event)
                continue
            if file.name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"):
                merge_attached_breadcrumbs(file.open_stream(), event)
                continue

            if file.type == "minidump" and not is_apple_crash_report:
                is_minidump = True

            # Always store the minidump in attachments so we can access it during
            # processing, regardless of the event-attachments feature. This will
            # allow us to stack walk again with CFI once symbols are loaded.
            if file.type == "minidump" or attachments_enabled:
                attachments.append(CachedAttachment(
                    name=file.name,
                    data=file.open_stream().read(),
                    type=unreal_attachment_type(file),
                ))

        if is_minidump:
            write_minidump_placeholder(event)

        event_id = self.process(
            request,
            attachments=attachments,
            data=event,
            project=project,
            project_config=project_config,
            **kwargs)

        # The return here is only useful for consistency
        # because the UE4 crash reporter doesn't care about it.
        return HttpResponse(
            six.text_type(uuid.UUID(event_id)),
            content_type='text/plain'
        )
예제 #16
0
def _do_save_event(
    cache_key=None, data=None, start_time=None, event_id=None, project_id=None, **kwargs
):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome
    from sentry.ingest.outcomes_consumer import mark_signal_sent

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data["event_id"]

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop("project")

    key_id = None if data is None else data.get("key_id")
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    # We only need to delete raw events for events that support
    # reprocessing.  If the data cannot be found we want to assume
    # that we need to delete the raw event.
    if not data or reprocessing.event_supports_reprocessing(data):
        delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr(
            "events.failed", tags={"reason": "cache", "stage": "post"}, skip_internal=False
        )
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        # event.project.organization is populated after this statement.
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has("organizations:event-attachments", event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id,
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        # There is no signal supposed to be sent for this particular
        # outcome-reason combination. Prevent the outcome consumer from
        # emitting it for now.
        #
        # XXX(markus): Revisit decision about signals once outcomes consumer is stable.
        mark_signal_sent(project_id, event_id)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id,
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or features.has(
                "organizations:event-attachments", event.project.organization, actor=None
            ):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing("events.time-to-process", time() - start_time, instance=data["platform"])
예제 #17
0
파일: api.py 프로젝트: yola-0316/sentry
    def process(self,
                request,
                project,
                key,
                auth,
                helper,
                data,
                project_config,
                attachments=None,
                **kwargs):
        disable_transaction_events()
        metrics.incr("events.total", skip_internal=False)

        project_id = project_config.project_id
        organization_id = project_config.organization_id

        if not data:
            track_outcome(organization_id, project_id, key.id, Outcome.INVALID,
                          "no_data")
            raise APIError("No JSON data was found")

        remote_addr = request.META["REMOTE_ADDR"]

        event_manager = EventManager(
            data,
            project=project,
            key=key,
            auth=auth,
            client_ip=remote_addr,
            user_agent=helper.context.agent,
            version=auth.version,
            content_encoding=request.META.get("HTTP_CONTENT_ENCODING", ""),
            project_config=project_config,
        )
        del data

        self.pre_normalize(event_manager, helper)

        try:
            event_manager.normalize()
        except ProcessingErrorInvalidTransaction as e:
            track_outcome(organization_id, project_id, key.id, Outcome.INVALID,
                          "invalid_transaction")
            raise APIError(six.text_type(e).split("\n", 1)[0])

        data = event_manager.get_data()
        dict_data = dict(data)
        data_size = len(json.dumps(dict_data))

        if data_size > 10000000:
            metrics.timing("events.size.rejected", data_size)
            track_outcome(
                organization_id,
                project_id,
                key.id,
                Outcome.INVALID,
                "too_large",
                event_id=dict_data.get("event_id"),
            )
            raise APIForbidden("Event size exceeded 10MB after normalization.")

        metrics.timing("events.size.data.post_storeendpoint", data_size)

        return process_event(event_manager, project, key, remote_addr, helper,
                             attachments, project_config)
예제 #18
0
파일: api.py 프로젝트: yaoqi/sentry
    def post(self, request, project, **kwargs):
        # Minidump request payloads do not have the same structure as
        # usual events from other SDKs. Most notably, the event needs
        # to be transfered in the `sentry` form field. All other form
        # fields are assumed "extra" information. The only exception
        # to this is `upload_file_minidump`, which contains the minidump.

        if any(key.startswith('sentry[') for key in request.POST):
            # First, try to parse the nested form syntax `sentry[key][key]`
            # This is required for the Breakpad client library, which only
            # supports string values of up to 64 characters.
            extra = parser.parse(request.POST.urlencode())
            data = extra.pop('sentry', {})
        else:
            # Custom clients can submit longer payloads and should JSON
            # encode event data into the optional `sentry` field.
            extra = request.POST
            json_data = extra.pop('sentry', None)
            data = json.loads(json_data[0]) if json_data else {}

        # Merge additional form fields from the request with `extra`
        # data from the event payload and set defaults for processing.
        extra.update(data.get('extra', {}))
        data['extra'] = extra

        # Assign our own UUID so we can track this minidump. We cannot trust the
        # uploaded filename, and if reading the minidump fails there is no way
        # we can ever retrieve the original UUID from the minidump.
        event_id = data.get('event_id') or uuid.uuid4().hex
        data['event_id'] = event_id

        # At this point, we only extract the bare minimum information
        # needed to continue processing. This requires to process the
        # minidump without symbols and CFI to obtain an initial stack
        # trace (most likely via stack scanning). If all validations
        # pass, the event will be inserted into the database.
        try:
            minidump = request.FILES['upload_file_minidump']
        except KeyError:
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "missing_minidump_upload")
            raise APIError('Missing minidump upload')

        # Breakpad on linux sometimes stores the entire HTTP request body as
        # dump file instead of just the minidump. The Electron SDK then for
        # example uploads a multipart formdata body inside the minidump file.
        # It needs to be re-parsed, to extract the actual minidump before
        # continuing.
        minidump.seek(0)
        if minidump.read(2) == b'--':
            # The remaining bytes of the first line are the form boundary. We
            # have already read two bytes, the remainder is the form boundary
            # (excluding the initial '--').
            boundary = minidump.readline().rstrip()
            minidump.seek(0)

            # Next, we have to fake a HTTP request by specifying the form
            # boundary and the content length, or otherwise Django will not try
            # to parse our form body. Also, we need to supply new upload
            # handlers since they cannot be reused from the current request.
            meta = {
                'CONTENT_TYPE': b'multipart/form-data; boundary=%s' % boundary,
                'CONTENT_LENGTH': minidump.size,
            }
            handlers = [
                uploadhandler.load_handler(handler, request)
                for handler in settings.FILE_UPLOAD_HANDLERS
            ]

            _, files = MultiPartParser(meta, minidump, handlers).parse()
            try:
                minidump = files['upload_file_minidump']
            except KeyError:
                track_outcome(
                    project.organization_id,
                    project.id,
                    None,
                    Outcome.INVALID,
                    "missing_minidump_upload")
                raise APIError('Missing minidump upload')

        if minidump.size == 0:
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "empty_minidump")
            raise APIError('Empty minidump upload received')

        if settings.SENTRY_MINIDUMP_CACHE:
            if not os.path.exists(settings.SENTRY_MINIDUMP_PATH):
                os.mkdir(settings.SENTRY_MINIDUMP_PATH, 0o744)

            with open('%s/%s.dmp' % (settings.SENTRY_MINIDUMP_PATH, event_id), 'wb') as out:
                for chunk in minidump.chunks():
                    out.write(chunk)

        # Always store the minidump in attachments so we can access it during
        # processing, regardless of the event-attachments feature. This will
        # allow us to stack walk again with CFI once symbols are loaded.
        attachments = []
        minidump.seek(0)
        attachments.append(CachedAttachment.from_upload(minidump, type=MINIDUMP_ATTACHMENT_TYPE))
        has_event_attachments = features.has('organizations:event-attachments',
                                             project.organization, actor=request.user)

        # Append all other files as generic attachments. We can skip this if the
        # feature is disabled since they won't be saved.
        for name, file in six.iteritems(request.FILES):
            if name == 'upload_file_minidump':
                continue

            # Known attachment: msgpack event
            if name == "__sentry-event":
                merge_attached_event(file, data)
                continue
            if name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"):
                merge_attached_breadcrumbs(file, data)
                continue

            # Add any other file as attachment
            if has_event_attachments:
                attachments.append(CachedAttachment.from_upload(file))

        try:
            state = process_minidump(minidump)
            merge_process_state_event(data, state)
        except ProcessMinidumpError as e:
            minidumps_logger.exception(e)
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "process_minidump")
            raise APIError(e.message.split('\n', 1)[0])

        event_id = self.process(
            request,
            attachments=attachments,
            data=data,
            project=project,
            **kwargs)

        # Return the formatted UUID of the generated event. This is
        # expected by the Electron http uploader on Linux and doesn't
        # break the default Breakpad client library.
        return HttpResponse(
            six.text_type(uuid.UUID(event_id)),
            content_type='text/plain'
        )
예제 #19
0
파일: api.py 프로젝트: aleksaToljic/sentry
def process_event(event_manager, project, key, remote_addr, helper,
                  attachments):
    event_received.send_robust(ip=remote_addr,
                               project=project,
                               sender=process_event)

    event_id = event_manager.get_data().get('event_id')
    start_time = time()
    should_filter, filter_reason = event_manager.should_filter()
    if should_filter:
        track_outcome(project.organization_id,
                      project.id,
                      key.id,
                      Outcome.FILTERED,
                      filter_reason,
                      event_id=event_id)
        metrics.incr('events.blacklisted',
                     tags={'reason': filter_reason},
                     skip_internal=False)
        event_filtered.send_robust(
            ip=remote_addr,
            project=project,
            sender=process_event,
        )
        raise APIForbidden('Event dropped due to filter: %s' %
                           (filter_reason, ))

    # TODO: improve this API (e.g. make RateLimit act on __ne__)
    rate_limit = safe_execute(quotas.is_rate_limited,
                              project=project,
                              key=key,
                              _with_transaction=False)
    if isinstance(rate_limit, bool):
        rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

    # XXX(dcramer): when the rate limiter fails we drop events to ensure
    # it cannot cascade
    if rate_limit is None or rate_limit.is_limited:
        if rate_limit is None:
            api_logger.debug('Dropped event due to error with rate limiter')

        reason = rate_limit.reason_code if rate_limit else None
        track_outcome(project.organization_id,
                      project.id,
                      key.id,
                      Outcome.RATE_LIMITED,
                      reason,
                      event_id=event_id)
        metrics.incr(
            'events.dropped',
            tags={
                'reason': reason or 'unknown',
            },
            skip_internal=False,
        )
        event_dropped.send_robust(
            ip=remote_addr,
            project=project,
            reason_code=reason,
            sender=process_event,
        )
        if rate_limit is not None:
            raise APIRateLimited(rate_limit.retry_after)

    org_options = OrganizationOption.objects.get_all_values(
        project.organization_id)

    data = event_manager.get_data()
    del event_manager

    event_id = data['event_id']

    # TODO(dcramer): ideally we'd only validate this if the event_id was
    # supplied by the user
    cache_key = 'ev:%s:%s' % (
        project.id,
        event_id,
    )

    if cache.get(cache_key) is not None:
        track_outcome(project.organization_id,
                      project.id,
                      key.id,
                      Outcome.INVALID,
                      'duplicate',
                      event_id=event_id)
        raise APIForbidden('An event with the same ID already exists (%s)' %
                           (event_id, ))

    scrub_ip_address = (
        org_options.get('sentry:require_scrub_ip_address', False)
        or project.get_option('sentry:scrub_ip_address', False))
    scrub_data = (org_options.get('sentry:require_scrub_data', False)
                  or project.get_option('sentry:scrub_data', True))

    if scrub_data:
        # We filter data immediately before it ever gets into the queue
        sensitive_fields_key = 'sentry:sensitive_fields'
        sensitive_fields = (org_options.get(sensitive_fields_key, []) +
                            project.get_option(sensitive_fields_key, []))

        exclude_fields_key = 'sentry:safe_fields'
        exclude_fields = (org_options.get(exclude_fields_key, []) +
                          project.get_option(exclude_fields_key, []))

        scrub_defaults = (org_options.get('sentry:require_scrub_defaults',
                                          False)
                          or project.get_option('sentry:scrub_defaults', True))

        SensitiveDataFilter(
            fields=sensitive_fields,
            include_defaults=scrub_defaults,
            exclude_fields=exclude_fields,
        ).apply(data)

    if scrub_ip_address:
        # We filter data immediately before it ever gets into the queue
        helper.ensure_does_not_have_ip(data)

    # mutates data (strips a lot of context if not queued)
    helper.insert_data_to_database(data,
                                   start_time=start_time,
                                   attachments=attachments)

    cache.set(cache_key, '', 60 * 5)

    api_logger.debug('New event received (%s)', event_id)

    event_accepted.send_robust(
        ip=remote_addr,
        data=data,
        project=project,
        sender=process_event,
    )

    return event_id
예제 #20
0
파일: api.py 프로젝트: yaoqi/sentry
    def post(self, request, project, **kwargs):
        attachments_enabled = features.has('organizations:event-attachments',
                                           project.organization, actor=request.user)

        attachments = []
        event = {'event_id': uuid.uuid4().hex}
        try:
            unreal = process_unreal_crash(request.body, request.GET.get(
                'UserID'), request.GET.get('AppEnvironment'), event)
            process_state = unreal.process_minidump()
            if process_state:
                merge_process_state_event(event, process_state)
            else:
                apple_crash_report = unreal.get_apple_crash_report()
                if apple_crash_report:
                    merge_apple_crash_report(apple_crash_report, event)
                else:
                    track_outcome(project.organization_id, project.id, None,
                                  Outcome.INVALID, "missing_minidump_unreal")
                    raise APIError("missing minidump in unreal crash report")
        except (ProcessMinidumpError, Unreal4Error) as e:
            minidumps_logger.exception(e)
            track_outcome(
                project.organization_id,
                project.id,
                None,
                Outcome.INVALID,
                "process_minidump_unreal")
            raise APIError(e.message.split('\n', 1)[0])

        try:
            unreal_context = unreal.get_context()
            if unreal_context is not None:
                merge_unreal_context_event(unreal_context, event, project)
        except Unreal4Error as e:
            # we'll continue without the context data
            minidumps_logger.exception(e)

        try:
            unreal_logs = unreal.get_logs()
            if unreal_logs is not None:
                merge_unreal_logs_event(unreal_logs, event)
        except Unreal4Error as e:
            # we'll continue without the breadcrumbs
            minidumps_logger.exception(e)

        for file in unreal.files():
            # Known attachment: msgpack event
            if file.name == "__sentry-event":
                merge_attached_event(file.open_stream(), event)
                continue
            if file.name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"):
                merge_attached_breadcrumbs(file.open_stream(), event)
                continue

            # Always store the minidump in attachments so we can access it during
            # processing, regardless of the event-attachments feature. This will
            # allow us to stack walk again with CFI once symbols are loaded.
            if file.type == "minidump" or attachments_enabled:
                attachments.append(CachedAttachment(
                    name=file.name,
                    data=file.open_stream().read(),
                    type=unreal_attachment_type(file),
                ))

        event_id = self.process(
            request,
            attachments=attachments,
            data=event,
            project=project,
            **kwargs)

        # The return here is only useful for consistency
        # because the UE4 crash reporter doesn't care about it.
        return HttpResponse(
            six.text_type(uuid.UUID(event_id)),
            content_type='text/plain'
        )
예제 #21
0
def process_event(event_manager, project, key, remote_addr, helper,
                  attachments, project_config):
    event_received.send_robust(ip=remote_addr,
                               project=project,
                               sender=process_event)

    start_time = time()

    data = event_manager.get_data()
    should_filter, filter_reason = event_manager.should_filter()
    del event_manager

    event_id = data["event_id"]

    if should_filter:
        signals_in_consumer = decide_signals_in_consumer()

        if not signals_in_consumer:
            # Mark that the event_filtered signal is sent. Do this before emitting
            # the outcome to avoid a potential race between OutcomesConsumer and
            # `event_filtered.send_robust` below.
            mark_signal_sent(project_config.project_id, event_id)

        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.FILTERED,
            filter_reason,
            event_id=event_id,
        )
        metrics.incr("events.blacklisted",
                     tags={"reason": filter_reason},
                     skip_internal=False)

        if not signals_in_consumer:
            event_filtered.send_robust(ip=remote_addr,
                                       project=project,
                                       sender=process_event)

        # relay will no longer be able to provide information about filter
        # status so to see the impact we're adding a way to turn on relay
        # like behavior here.
        if options.get("store.lie-about-filter-status"):
            return event_id

        raise APIForbidden("Event dropped due to filter: %s" %
                           (filter_reason, ))

    # TODO: improve this API (e.g. make RateLimit act on __ne__)
    rate_limit = safe_execute(quotas.is_rate_limited,
                              project=project,
                              key=key,
                              _with_transaction=False)
    if isinstance(rate_limit, bool):
        rate_limit = RateLimit(is_limited=rate_limit, retry_after=None)

    # XXX(dcramer): when the rate limiter fails we drop events to ensure
    # it cannot cascade
    if rate_limit is None or rate_limit.is_limited:
        if rate_limit is None:
            api_logger.debug("Dropped event due to error with rate limiter")

        signals_in_consumer = decide_signals_in_consumer()

        if not signals_in_consumer:
            # Mark that the event_dropped signal is sent. Do this before emitting
            # the outcome to avoid a potential race between OutcomesConsumer and
            # `event_dropped.send_robust` below.
            mark_signal_sent(project_config.project_id, event_id)

        reason = rate_limit.reason_code if rate_limit else None
        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.RATE_LIMITED,
            reason,
            event_id=event_id,
        )
        metrics.incr("events.dropped",
                     tags={"reason": reason or "unknown"},
                     skip_internal=False)
        if not signals_in_consumer:
            event_dropped.send_robust(ip=remote_addr,
                                      project=project,
                                      reason_code=reason,
                                      sender=process_event)

        if rate_limit is not None:
            raise APIRateLimited(rate_limit.retry_after)

    # TODO(dcramer): ideally we'd only validate this if the event_id was
    # supplied by the user
    cache_key = "ev:%s:%s" % (project_config.project_id, event_id)

    if cache.get(cache_key) is not None:
        track_outcome(
            project_config.organization_id,
            project_config.project_id,
            key.id,
            Outcome.INVALID,
            "duplicate",
            event_id=event_id,
        )
        raise APIForbidden("An event with the same ID already exists (%s)" %
                           (event_id, ))

    datascrubbing_settings = project_config.config.get(
        "datascrubbingSettings") or {}
    data = semaphore.scrub_event(datascrubbing_settings, dict(data))

    # mutates data (strips a lot of context if not queued)
    helper.insert_data_to_database(data,
                                   start_time=start_time,
                                   attachments=attachments)

    cache.set(cache_key, "", 60 * 60)  # Cache for 1 hour

    api_logger.debug("New event received (%s)", event_id)

    event_accepted.send_robust(ip=remote_addr,
                               data=data,
                               project=project,
                               sender=process_event)

    return event_id
예제 #22
0
    def post(self, request, project, project_config, **kwargs):
        # Minidump request payloads do not have the same structure as usual
        # events from other SDKs. The minidump can either be transmitted as
        # request body, or as `upload_file_minidump` in a multipart formdata
        # request. Optionally, an event payload can be sent in the `sentry` form
        # field, either as JSON or as nested form data.

        request_files = request.FILES or {}
        content_type = request.META.get("CONTENT_TYPE")

        if content_type in self.dump_types:
            minidump = io.BytesIO(request.body)
            minidump_name = "Minidump"
            data = {}
        else:
            minidump = request_files.get("upload_file_minidump")
            minidump_name = minidump and minidump.name or None

            if any(key.startswith("sentry[") for key in request.POST):
                # First, try to parse the nested form syntax `sentry[key][key]`
                # This is required for the Breakpad client library, which only
                # supports string values of up to 64 characters.
                extra = parser.parse(request.POST.urlencode())
                data = extra.pop("sentry", {})
            else:
                # Custom clients can submit longer payloads and should JSON
                # encode event data into the optional `sentry` field.
                extra = request.POST.dict()
                json_data = extra.pop("sentry", None)
                try:
                    data = json.loads(json_data) if json_data else {}
                except ValueError:
                    data = {}

            if not isinstance(data, dict):
                data = {}

            # Merge additional form fields from the request with `extra` data
            # from the event payload and set defaults for processing. This is
            # sent by clients like Breakpad or Crashpad.
            extra.update(data.get("extra", {}))
            data["extra"] = extra

        if not minidump:
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                None,
                Outcome.INVALID,
                "missing_minidump_upload",
            )
            raise APIError("Missing minidump upload")

        # Breakpad on linux sometimes stores the entire HTTP request body as
        # dump file instead of just the minidump. The Electron SDK then for
        # example uploads a multipart formdata body inside the minidump file.
        # It needs to be re-parsed, to extract the actual minidump before
        # continuing.
        minidump.seek(0)
        if minidump.read(2) == b"--":
            # The remaining bytes of the first line are the form boundary. We
            # have already read two bytes, the remainder is the form boundary
            # (excluding the initial '--').
            boundary = minidump.readline().rstrip()
            minidump.seek(0)

            # Next, we have to fake a HTTP request by specifying the form
            # boundary and the content length, or otherwise Django will not try
            # to parse our form body. Also, we need to supply new upload
            # handlers since they cannot be reused from the current request.
            meta = {
                "CONTENT_TYPE": b"multipart/form-data; boundary=%s" % boundary,
                "CONTENT_LENGTH": minidump.size,
            }
            handlers = [
                uploadhandler.load_handler(handler, request)
                for handler in settings.FILE_UPLOAD_HANDLERS
            ]

            _, inner_files = MultiPartParser(meta, minidump, handlers).parse()
            try:
                minidump = inner_files["upload_file_minidump"]
                minidump_name = minidump.name
            except KeyError:
                track_outcome(
                    project_config.organization_id,
                    project_config.project_id,
                    None,
                    Outcome.INVALID,
                    "missing_minidump_upload",
                )
                raise APIError("Missing minidump upload")

        minidump.seek(0)
        if minidump.read(4) != "MDMP":
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                None,
                Outcome.INVALID,
                "invalid_minidump",
            )
            raise APIError("Uploaded file was not a minidump")

        # Always store the minidump in attachments so we can access it during
        # processing, regardless of the event-attachments feature. This is
        # required to process the minidump with debug information.
        attachments = []

        # The minidump attachment is special. It has its own attachment type to
        # distinguish it from regular attachments for processing. Also, it might
        # not be part of `request_files` if it has been uploaded as raw request
        # body instead of a multipart formdata request.
        minidump.seek(0)
        attachments.append(
            CachedAttachment(
                name=minidump_name,
                content_type="application/octet-stream",
                data=minidump.read(),
                type=MINIDUMP_ATTACHMENT_TYPE,
            ))

        # Append all other files as generic attachments.
        # RaduW 4 Jun 2019 always sent attachments for minidump (does not use
        # event-attachments feature)
        for name, file in six.iteritems(request_files):
            if name == "upload_file_minidump":
                continue

            # Known attachment: msgpack event
            if name == "__sentry-event":
                merge_attached_event(file, data)
                continue
            if name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"):
                merge_attached_breadcrumbs(file, data)
                continue

            # Add any other file as attachment
            attachments.append(CachedAttachment.from_upload(file))

        # Assign our own UUID so we can track this minidump. We cannot trust
        # the uploaded filename, and if reading the minidump fails there is
        # no way we can ever retrieve the original UUID from the minidump.
        event_id = data.get("event_id") or uuid.uuid4().hex
        data["event_id"] = event_id

        # Write a minimal event payload that is required to kick off native
        # event processing. It is also used as fallback if processing of the
        # minidump fails.
        # NB: This occurs after merging attachments to overwrite potentially
        # contradicting payloads transmitted in __sentry_event.
        write_minidump_placeholder(data)

        event_id = self.process(request,
                                attachments=attachments,
                                data=data,
                                project=project,
                                project_config=project_config,
                                **kwargs)

        # Return the formatted UUID of the generated event. This is
        # expected by the Electron http uploader on Linux and doesn't
        # break the default Breakpad client library.
        return HttpResponse(six.text_type(uuid.UUID(event_id)),
                            content_type="text/plain")
예제 #23
0
    def post(self, request, project, project_config, **kwargs):
        attachments_enabled = features.has("organizations:event-attachments",
                                           project.organization,
                                           actor=request.user)

        attachments = []
        event = {
            "event_id": uuid.uuid4().hex,
            "environment": request.GET.get("AppEnvironment")
        }

        user_id = request.GET.get("UserID")
        if user_id:
            merge_unreal_user(event, user_id)

        try:
            unreal = Unreal4Crash.from_bytes(request.body)
        except (ProcessMinidumpError, Unreal4Error) as e:
            minidumps_logger.exception(e)
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                None,
                Outcome.INVALID,
                "process_unreal",
            )
            raise APIError(e.message.split("\n", 1)[0])

        try:
            unreal_context = unreal.get_context()
        except Unreal4Error as e:
            # we'll continue without the context data
            unreal_context = None
            minidumps_logger.exception(e)
        else:
            if unreal_context is not None:
                merge_unreal_context_event(unreal_context, event, project)

        try:
            unreal_logs = unreal.get_logs()
        except Unreal4Error as e:
            # we'll continue without the breadcrumbs
            minidumps_logger.exception(e)
        else:
            if unreal_logs is not None:
                merge_unreal_logs_event(unreal_logs, event)

        is_minidump = False
        is_applecrashreport = False

        for file in unreal.files():
            # Known attachment: msgpack event
            if file.name == "__sentry-event":
                merge_attached_event(file.open_stream(), event)
                continue
            if file.name in ("__sentry-breadcrumb1", "__sentry-breadcrumb2"):
                merge_attached_breadcrumbs(file.open_stream(), event)
                continue

            if file.type == "minidump":
                is_minidump = True
            if file.type == "applecrashreport":
                is_applecrashreport = True

            # Always store attachments that can be processed, regardless of the
            # event-attachments feature.
            if file.type in self.required_attachments or attachments_enabled:
                attachments.append(
                    CachedAttachment(
                        name=file.name,
                        data=file.open_stream().read(),
                        type=unreal_attachment_type(file),
                    ))

        if is_minidump:
            write_minidump_placeholder(event)
        elif is_applecrashreport:
            write_applecrashreport_placeholder(event)

        event_id = self.process(request,
                                attachments=attachments,
                                data=event,
                                project=project,
                                project_config=project_config,
                                **kwargs)

        # The return here is only useful for consistency
        # because the UE4 crash reporter doesn't care about it.
        return HttpResponse(six.text_type(uuid.UUID(event_id)),
                            content_type="text/plain")
예제 #24
0
파일: store.py 프로젝트: yaoqi/sentry
def _do_save_event(cache_key=None, data=None, start_time=None, event_id=None,
                   project_id=None, **kwargs):
    """
    Saves an event to the database.
    """
    from sentry.event_manager import HashDiscarded, EventManager
    from sentry import quotas
    from sentry.models import ProjectKey
    from sentry.utils.outcomes import Outcome, track_outcome

    if cache_key and data is None:
        data = default_cache.get(cache_key)

    if data is not None:
        data = CanonicalKeyDict(data)

    if event_id is None and data is not None:
        event_id = data['event_id']

    # only when we come from reprocessing we get a project_id sent into
    # the task.
    if project_id is None:
        project_id = data.pop('project')

    key_id = None if data is None else data.get('key_id')
    if key_id is not None:
        key_id = int(key_id)
    timestamp = to_datetime(start_time) if start_time is not None else None

    delete_raw_event(project_id, event_id, allow_hint_clear=True)

    # This covers two cases: where data is None because we did not manage
    # to fetch it from the default cache or the empty dictionary was
    # stored in the default cache.  The former happens if the event
    # expired while being on the queue, the second happens on reprocessing
    # if the raw event was deleted concurrently while we held on to
    # it.  This causes the node store to delete the data and we end up
    # fetching an empty dict.  We could in theory not invoke `save_event`
    # in those cases but it's important that we always clean up the
    # reprocessing reports correctly or they will screw up the UI.  So
    # to future proof this correctly we just handle this case here.
    if not data:
        metrics.incr(
            'events.failed',
            tags={
                'reason': 'cache',
                'stage': 'post'},
            skip_internal=False)
        return

    with configure_scope() as scope:
        scope.set_tag("project", project_id)

    event = None
    try:
        manager = EventManager(data)
        event = manager.save(project_id, assume_normalized=True)

        # Always load attachments from the cache so we can later prune them.
        # Only save them if the event-attachments feature is active, though.
        if features.has('organizations:event-attachments', event.project.organization, actor=None):
            attachments = attachment_cache.get(cache_key) or []
            for attachment in attachments:
                save_attachment(event, attachment)

        # This is where we can finally say that we have accepted the event.
        track_outcome(
            event.project.organization_id,
            event.project.id,
            key_id,
            Outcome.ACCEPTED,
            None,
            timestamp,
            event_id
        )

    except HashDiscarded:
        project = Project.objects.get_from_cache(id=project_id)
        reason = FilterStatKeys.DISCARDED_HASH
        project_key = None
        try:
            if key_id is not None:
                project_key = ProjectKey.objects.get_from_cache(id=key_id)
        except ProjectKey.DoesNotExist:
            pass

        quotas.refund(project, key=project_key, timestamp=start_time)
        track_outcome(
            project.organization_id,
            project_id,
            key_id,
            Outcome.FILTERED,
            reason,
            timestamp,
            event_id
        )

    finally:
        if cache_key:
            default_cache.delete(cache_key)

            # For the unlikely case that we did not manage to persist the
            # event we also delete the key always.
            if event is None or \
               features.has('organizations:event-attachments', event.project.organization, actor=None):
                attachment_cache.delete(cache_key)

        if start_time:
            metrics.timing(
                'events.time-to-process',
                time() - start_time,
                instance=data['platform'])
예제 #25
0
def discard_event(job, attachments):
    """
    Refunds consumed quotas for an event and its attachments.

    For the event and each dropped attachment, an outcome
    FILTERED(discarded-hash) is emitted.

    :param job:         The job context container.
    :param attachments: The full list of attachments to filter.
    """

    project = job["event"].project

    quotas.refund(
        project,
        key=job["project_key"],
        timestamp=job["start_time"],
        category=job["category"],
        quantity=1,
    )

    track_outcome(
        org_id=project.organization_id,
        project_id=job["project_id"],
        key_id=job["key_id"],
        outcome=Outcome.FILTERED,
        reason=FilterStatKeys.DISCARDED_HASH,
        timestamp=to_datetime(job["start_time"]),
        event_id=job["event"].event_id,
        category=job["category"],
    )

    attachment_quantity = 0
    for attachment in attachments:
        # Quotas are counted with at least ``1`` for attachments.
        attachment_quantity += attachment.size or 1

        track_outcome(
            org_id=project.organization_id,
            project_id=job["project_id"],
            key_id=job["key_id"],
            outcome=Outcome.FILTERED,
            reason=FilterStatKeys.DISCARDED_HASH,
            timestamp=to_datetime(job["start_time"]),
            event_id=job["event"].event_id,
            category=DataCategory.ATTACHMENT,
            quantity=attachment.size,
        )

    if attachment_quantity:
        quotas.refund(
            project,
            key=job["project_key"],
            timestamp=job["start_time"],
            category=DataCategory.ATTACHMENT,
            quantity=attachment_quantity,
        )

    metrics.incr(
        "events.discarded",
        skip_internal=True,
        tags={"organization_id": project.organization_id, "platform": job["platform"]},
    )