Example #1
0
def is_valid_csp_report(report, project=None):
    # Some reports from Chrome report blocked-uri as just 'about'.
    # In this case, this is not actionable and is just noisy.
    # Observed in Chrome 45 and 46.
    if report.get('effective_directive') not in ALLOWED_DIRECTIVES:
        return False

    blocked_uri = report.get('blocked_uri')
    if blocked_uri == 'about':
        return False

    source_file = report.get('source_file')

    # We must have one of these to do anyting sensible
    if not any((blocked_uri, source_file)):
        return False

    if project is None or bool(project.get_option('sentry:csp_ignored_sources_defaults', True)):
        disallowed_sources = DISALLOWED_SOURCES
    else:
        disallowed_sources = ()

    if project is not None:
        disallowed_sources += tuple(project.get_option('sentry:csp_ignored_sources', []))

    if not disallowed_sources:
        return True

    if source_file and is_valid_origin(source_file, allowed=disallowed_sources):
        return False

    if blocked_uri and is_valid_origin(blocked_uri, allowed=disallowed_sources):
        return False

    return True
Example #2
0
    def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)
            Raven.tags_context(helper.context.get_tags_context())

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError("Client must be upgraded for CORS support")
            if not is_valid_origin(origin, project):
                raise APIForbidden("Invalid origin: %s" % (origin,))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == "OPTIONS":
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            project_ = helper.project_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                if not project_:
                    raise APIError("Unable to identify project")
                project = project_
                helper.context.bind_project(project)
            elif project_ != project:
                raise APIError("Two different project were specified")

            helper.context.bind_auth(auth)
            Raven.tags_context(helper.context.get_tags_context())

            if auth.version != "2.0":
                if not auth.secret_key:
                    # If we're missing a secret_key, check if we are allowed
                    # to do a CORS request.

                    # If we're missing an Origin/Referrer header entirely,
                    # we only want to support this on GET requests. By allowing
                    # un-authenticated CORS checks for POST, we basially
                    # are obsoleting our need for a secret key entirely.
                    if origin is None and request.method != "GET":
                        raise APIForbidden("Missing required attribute in authentication header: sentry_secret")

                    if not is_valid_origin(origin, project):
                        raise APIForbidden("Missing required Origin or Referer header")

            response = super(APIView, self).dispatch(
                request=request, project=project, auth=auth, helper=helper, **kwargs
            )

        if origin:
            response["Access-Control-Allow-Origin"] = origin

        return response
Example #3
0
    def dispatch(self, request, *args, **kwargs):
        """
        Identical to rest framework's dispatch except we add the ability
        to convert arguments (for common URL params).
        """
        self.args = args
        self.kwargs = kwargs
        request = self.initialize_request(request, *args, **kwargs)
        self.load_json_body(request)
        self.request = request
        self.headers = self.default_response_headers  # deprecate?

        if settings.SENTRY_API_RESPONSE_DELAY:
            time.sleep(settings.SENTRY_API_RESPONSE_DELAY / 1000.0)

        origin = request.META.get('HTTP_ORIGIN', 'null')
        # A "null" value should be treated as no Origin for us.
        # See RFC6454 for more information on this behavior.
        if origin == 'null':
            origin = None

        try:
            if origin and request.auth:
                allowed_origins = request.auth.get_allowed_origins()
                if not is_valid_origin(origin, allowed=allowed_origins):
                    response = Response('Invalid origin: %s' %
                                        (origin, ), status=400)
                    self.response = self.finalize_response(
                        request, response, *args, **kwargs)
                    return self.response

            self.initial(request, *args, **kwargs)

            # Get the appropriate handler method
            if request.method.lower() in self.http_method_names:
                handler = getattr(self, request.method.lower(),
                                  self.http_method_not_allowed)

                (args, kwargs) = self.convert_args(request, *args, **kwargs)
                self.args = args
                self.kwargs = kwargs
            else:
                handler = self.http_method_not_allowed

            if getattr(request, 'access', None) is None:
                # setup default access
                request.access = access.from_request(request)

            response = handler(request, *args, **kwargs)

        except Exception as exc:
            response = self.handle_exception(request, exc)

        if origin:
            self.add_cors_headers(request, response)

        self.response = self.finalize_response(
            request, response, *args, **kwargs)

        return self.response
Example #4
0
    def test_project_and_setting(self):
        from sentry.models import Project, ProjectOption
        project = Project.objects.get()
        ProjectOption.objects.create(project=project, key='sentry:origins', value=['http://foo.example'])

        with self.Settings(SENTRY_ALLOW_ORIGIN='http://example.com'):
            self.assertTrue(is_valid_origin('http://example.com', project))
Example #5
0
    def post(self, request, project, helper, **kwargs):
        json_body = helper.safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            raise APIError('Unrecognized security report type')
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            if project:
                tsdb.incr(tsdb.models.project_total_received_cors, project.id)
            raise APIForbidden('Invalid origin')

        data = {
            'interface': interface.path,
            'report': instance,
            'release': request.GET.get('sentry_release'),
            'environment': request.GET.get('sentry_environment'),
        }

        response_or_event_id = self.process(
            request, project=project, helper=helper, data=data, **kwargs
        )
        if isinstance(response_or_event_id, HttpResponse):
            return response_or_event_id
        return HttpResponse(content_type='application/javascript', status=201)
Example #6
0
    def post(self, request, project, auth, helper, **kwargs):
        data = helper.safely_load_json_string(request.body)

        # Do origin check based on the `document-uri` key as explained
        # in `_dispatch`.
        try:
            report = data['csp-report']
        except KeyError:
            raise APIError('Missing csp-report')

        origin = report.get('document-uri')

        # No idea, but this is garbage
        if origin == 'about:blank':
            raise APIForbidden('Invalid document-uri')

        if not is_valid_origin(origin, project):
            raise APIForbidden('Invalid document-uri')

        response_or_event_id = self.process(
            request,
            project=project,
            auth=auth,
            helper=helper,
            data=report,
            **kwargs
        )
        if isinstance(response_or_event_id, HttpResponse):
            return response_or_event_id
        return HttpResponse(status=201)
Example #7
0
    def post(self, request, project, helper, **kwargs):
        data = helper.safely_load_json_string(request.body)

        # Do origin check based on the `document-uri` key as explained
        # in `_dispatch`.
        try:
            report = data['csp-report']
        except KeyError:
            raise APIError('Missing csp-report')

        origin = report.get('document-uri')

        # No idea, but this is garbage
        if origin == 'about:blank':
            raise APIForbidden('Invalid document-uri')

        if not is_valid_origin(origin, project):
            if project:
                tsdb.incr(tsdb.models.project_total_received_cors,
                          project.id)
            raise APIForbidden('Invalid document-uri')

        # Attach on collected meta data. This data obviously isn't a part
        # of the spec, but we need to append to the report sentry specific things.
        report['_meta'] = {
            'release': request.GET.get('sentry_release'),
        }

        response_or_event_id = self.process(
            request, project=project, helper=helper, data=report, **kwargs
        )
        if isinstance(response_or_event_id, HttpResponse):
            return response_or_event_id
        return HttpResponse(status=201)
Example #8
0
    def dispatch(self, request):
        try:
            event_id = request.GET['eventId']
        except KeyError:
            return self._json_response(request, status=400)

        key = self._get_project_key(request)
        if not key:
            return self._json_response(request, status=404)

        origin = self._get_origin(request)
        if not origin:
            return self._json_response(request, status=403)

        if not is_valid_origin(origin, key.project):
            return HttpResponse(status=403)

        if request.method == 'OPTIONS':
            return self._json_response(request)

        # TODO(dcramer): since we cant use a csrf cookie we should at the very
        # least sign the request / add some kind of nonce
        initial = {
            'name': request.GET.get('name'),
            'email': request.GET.get('email'),
        }

        form = UserReportForm(request.POST if request.method == 'POST' else None,
                              initial=initial)
        if form.is_valid():
            report = form.save(commit=False)
            report.project = key.project
            report.event_id = event_id
            try:
                report.group = Group.objects.get(
                    eventmapping__event_id=report.event_id,
                    eventmapping__project=key.project,
                )
            except Group.DoesNotExist:
                # XXX(dcramer): the system should fill this in later
                pass
            report.save()
            return HttpResponse(status=200)
        elif request.method == 'POST':
            return self._json_response(request, {
                "errors": dict(form.errors),
            }, status=400)

        template = render_to_string('sentry/error-page-embed.html', {
            'form': form,
        })

        context = {
            'endpoint': mark_safe(json.dumps(request.get_full_path())),
            'template': mark_safe(json.dumps(template)),
        }

        return render_to_response('sentry/error-page-embed.js', context, request,
                                  content_type='text/javascript')
Example #9
0
    def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)
            Raven.tags_context(helper.context.get_tags_context())

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                tsdb.incr(tsdb.models.project_total_received_cors,
                          project.id)
                raise APIForbidden('Invalid origin: %s' % (origin, ))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            key = helper.project_key_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                project = Project.objects.get_from_cache(id=key.project_id)
                helper.context.bind_project(project)
            elif key.project_id != project.id:
                raise APIError('Two different projects were specified')

            helper.context.bind_auth(auth)
            Raven.tags_context(helper.context.get_tags_context())

            # Explicitly bind Organization so we don't implicitly query it later
            # this just allows us to comfortably assure that `project.organization` is safe.
            # This also allows us to pull the object from cache, instead of being
            # implicitly fetched from database.
            project.organization = Organization.objects.get_from_cache(
                id=project.organization_id)

            response = super(APIView, self).dispatch(
                request=request, project=project, auth=auth, helper=helper, key=key, **kwargs
            )

        if origin:
            if origin == 'null':
                # If an Origin is `null`, but we got this far, that means
                # we've gotten past our CORS check for some reason. But the
                # problem is that we can't return "null" as a valid response
                # to `Access-Control-Allow-Origin` and we don't have another
                # value to work with, so just allow '*' since they've gotten
                # this far.
                response['Access-Control-Allow-Origin'] = '*'
            else:
                response['Access-Control-Allow-Origin'] = origin

        return response
Example #10
0
def fetch_url(url, project=None):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """

    cache_key = 'source:%s' % (
        hashlib.md5(url.encode('utf-8')).hexdigest(),)
    result = cache.get(cache_key)
    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:%s' % (hashlib.md5(domain.encode('utf-8')).hexdigest(),)
        domain_result = cache.get(domain_key)
        if domain_result:
            return BAD_SOURCE

        headers = []
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers.append(('X-Sentry-Token', token))

        try:
            request = safe_urlopen(
                url,
                allow_redirects=True,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except HTTPError:
            result = BAD_SOURCE
        except Exception:
            # it's likely we've failed due to a timeout, dns, etc so let's
            # ensure we can't cascade the failure by pinning this for 5 minutes
            cache.set(domain_key, 1, 300)
            logger.warning('Disabling sources to %s for %ss', domain, 300,
                           exc_info=True)
            return BAD_SOURCE
        else:
            try:
                body = safe_urlread(request)
            except Exception:
                result = BAD_SOURCE
            else:
                result = (dict(request.headers), body)

        cache.set(cache_key, result, 60)

    if result == BAD_SOURCE:
        return result

    return UrlResult(url, *result)
Example #11
0
    def should_filter(self, project=None):
        disallowed = ()
        paths = ['blocked_uri', 'source_file']
        uris = [getattr(self, path) for path in paths if hasattr(self, path)]

        if project is None or bool(project.get_option('sentry:csp_ignored_sources_defaults', True)):
            disallowed += DEFAULT_DISALLOWED_SOURCES
        if project is not None:
            disallowed += tuple(project.get_option('sentry:csp_ignored_sources', []))

        if disallowed and any(is_valid_origin(uri and uri, allowed=disallowed) for uri in uris):
            return True

        return False
Example #12
0
    def dispatch(self, request, *args, **kwargs):
        """
        Identical to rest framework's dispatch except we add the ability
        to convert arguments (for common URL params).
        """
        self.args = args
        self.kwargs = kwargs
        request = self.initialize_request(request, *args, **kwargs)
        self.request = request
        self.headers = self.default_response_headers  # deprecate?

        metric_name = '{}.{}'.format(type(self).__name__, request.method.lower())

        if settings.SENTRY_API_RESPONSE_DELAY:
            time.sleep(settings.SENTRY_API_RESPONSE_DELAY / 1000.0)

        origin = request.META.get('HTTP_ORIGIN')
        if origin and request.auth:
            allowed_origins = request.auth.get_allowed_origins()
            if not is_valid_origin(origin, allowed=allowed_origins):
                response = Response('Invalid origin: %s' % (origin,), status=400)
                self.response = self.finalize_response(request, response, *args, **kwargs)
                return self.response

        try:
            self.initial(request, *args, **kwargs)

            # Get the appropriate handler method
            if request.method.lower() in self.http_method_names:
                handler = getattr(self, request.method.lower(),
                                  self.http_method_not_allowed)

                (args, kwargs) = self.convert_args(request, *args, **kwargs)
                self.args = args
                self.kwargs = kwargs
            else:
                handler = self.http_method_not_allowed

            with SqlQueryCountMonitor(metric_name):
                response = handler(request, *args, **kwargs)

        except Exception as exc:
            response = self.handle_exception(request, exc)

        if origin:
            self.add_cors_headers(request, response)

        self.response = self.finalize_response(request, response, *args, **kwargs)

        return self.response
Example #13
0
    def add_cors_headers(self, request, response):
        if not request.auth:
            return

        origin = request.META.get('HTTP_ORIGIN')
        if not origin:
            return

        allowed_origins = request.auth.get_allowed_origins()
        if is_valid_origin(origin, allowed=allowed_origins):
            response['Access-Control-Allow-Origin'] = origin
            response['Access-Control-Allow-Methods'] = ', '.join(self.http_method_names)

        return
Example #14
0
File: api.py Project: yaoqi/sentry
    def post(self, request, project, helper, key, **kwargs):
        json_body = safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            track_outcome(
                project.organization_id,
                project.id,
                key.id,
                Outcome.INVALID,
                "security_report_type")
            raise APIError('Unrecognized security report type')
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            track_outcome(
                project.organization_id,
                project.id,
                key.id,
                Outcome.INVALID,
                "security_report")
            raise APIError('Invalid security report: %s' % str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            if project:
                track_outcome(
                    project.organization_id,
                    project.id,
                    key.id,
                    Outcome.INVALID,
                    FilterStatKeys.CORS)
            raise APIForbidden('Invalid origin')

        data = {
            'interface': interface.path,
            'report': instance,
            'release': request.GET.get('sentry_release'),
            'environment': request.GET.get('sentry_environment'),
        }

        self.process(request, project=project, helper=helper, data=data, key=key, **kwargs)
        return HttpResponse(content_type='application/javascript', status=201)
Example #15
0
    def post(self, request, project, auth, helper, **kwargs):
        data = helper.safely_load_json_string(request.body)

        # Do origin check based on the `document-uri` key as explained
        # in `_dispatch`.
        try:
            report = data['csp-report']
        except KeyError:
            raise APIError('Missing csp-report')

        origin = report.get('document-uri')

        # No idea, but this is garbage
        if origin == 'about:blank':
            raise APIForbidden('Invalid document-uri')

        if not is_valid_origin(origin, project):
            raise APIForbidden('Invalid document-uri')

        # An invalid CSP report must go against quota
        if not is_valid_csp_report(report, project):
            app.tsdb.incr_multi([
                (app.tsdb.models.project_total_received, project.id),
                (app.tsdb.models.project_total_blacklisted, project.id),
                (app.tsdb.models.organization_total_received, project.organization_id),
                (app.tsdb.models.organization_total_blacklisted, project.organization_id),
            ])
            metrics.incr('events.blacklisted')
            raise APIForbidden('Rejected CSP report')

        response_or_event_id = self.process(
            request,
            project=project,
            auth=auth,
            helper=helper,
            data=report,
            **kwargs
        )
        if isinstance(response_or_event_id, HttpResponse):
            return response_or_event_id
        return HttpResponse(status=201)
Example #16
0
File: api.py Project: nkabir/sentry
    def wrapped(request, project_id=None, *args, **kwargs):
        if project_id:
            if project_id.isdigit():
                lookup_kwargs = {"id": int(project_id)}
            else:
                lookup_kwargs = {"slug": project_id}
            try:
                project = Project.objects.get_from_cache(**lookup_kwargs)
            except Project.DoesNotExist:
                return HttpResponse("Invalid project_id: %r" % project_id, status=400)
        else:
            project = None

        origin = request.META.get("HTTP_ORIGIN", None)
        if origin is not None and not is_valid_origin(origin, project):
            return HttpResponse("Invalid origin: %r" % origin, status=400)

        response = func(request, project, *args, **kwargs)
        response = apply_access_control_headers(response, origin)

        return response
Example #17
0
    def post(self, request, project, auth, helper, **kwargs):
        data = helper.safely_load_json_string(request.body)

        # Do origin check based on the `document-uri` key as explained
        # in `_dispatch`.
        try:
            report = data["csp-report"]
        except KeyError:
            raise APIError("Missing csp-report")

        origin = report.get("document-uri")

        # No idea, but this is garbage
        if origin == "about:blank":
            raise APIForbidden("Invalid document-uri")

        if not is_valid_origin(origin, project):
            raise APIForbidden("Invalid document-uri")

        # An invalid CSP report must go against quota
        if not is_valid_csp_report(report, project):
            app.tsdb.incr_multi(
                [
                    (app.tsdb.models.project_total_received, project.id),
                    (app.tsdb.models.project_total_blacklisted, project.id),
                    (app.tsdb.models.organization_total_received, project.organization_id),
                    (app.tsdb.models.organization_total_blacklisted, project.organization_id),
                ]
            )
            metrics.incr("events.blacklisted")
            raise APIForbidden("Rejected CSP report")

        # Attach on collected meta data. This data obviously isn't a part
        # of the spec, but we need to append to the report sentry specific things.
        report["_meta"] = {"release": request.GET.get("sentry_release")}

        response_or_event_id = self.process(request, project=project, auth=auth, helper=helper, data=report, **kwargs)
        if isinstance(response_or_event_id, HttpResponse):
            return response_or_event_id
        return HttpResponse(status=201)
Example #18
0
def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise http.CannotFetch(
            {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
        )
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(
                result[0], result[1], zlib.decompress(result[2]), result[3], encoding
            )

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option('sentry:verify_ssl', False))
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option('sentry:token_header') or 'X-Sentry-Token'
                headers[token_header] = token

        with metrics.timer('sourcemaps.fetch'):
            result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url,
                 result.headers,
                 z_body,
                 result.status,
                 result.encoding),
                get_max_age(result.headers))

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch(
            {
                'type': EventError.FETCH_INVALID_HTTP_CODE,
                'value': result.status,
                'url': http.expose_url(url),
            }
        )

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(
                result.url, result.headers,
                result.body.encode('utf8'), result.status, result.encoding
            )
        except UnicodeEncodeError:
            error = {
                'type': EventError.FETCH_INVALID_ENCODING,
                'value': 'utf8',
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise http.CannotFetch(error)

    return result
Example #19
0
    def _dispatch(self, request, helper, project_id=None, origin=None,
                  *args, **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)
            Raven.tags_context(helper.context.get_tags_context())

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                raise APIForbidden('Invalid origin: %s' % (origin,))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            project_ = helper.project_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                if not project_:
                    raise APIError('Unable to identify project')
                project = project_
                helper.context.bind_project(project)
            elif project_ != project:
                raise APIError('Two different project were specified')

            helper.context.bind_auth(auth)
            Raven.tags_context(helper.context.get_tags_context())

            if auth.version != '2.0':
                if request.method == 'GET':
                    # GET only requires an Origin/Referer check
                    # If an Origin isn't passed, it's possible that the project allows no origin,
                    # so we need to explicitly check for that here. If Origin is not None,
                    # it can be safely assumed that it was checked previously and it's ok.
                    if origin is None and not is_valid_origin(origin, project):
                        # Special case an error message for a None origin when None wasn't allowed
                        raise APIForbidden('Missing required Origin or Referer header')
                else:
                    # Version 3 enforces secret key for server side requests
                    if not auth.secret_key:
                        raise APIForbidden('Missing required attribute in authentication header: sentry_secret')

            response = super(APIView, self).dispatch(
                request=request,
                project=project,
                auth=auth,
                helper=helper,
                **kwargs
            )

        if origin:
            response['Access-Control-Allow-Origin'] = origin

        return response
Example #20
0
    def post(self, request, project, helper, key, project_config, **kwargs):
        # This endpoint only accepts security reports.
        data_category = DataCategory.SECURITY

        json_body = safely_load_json_string(request.body)
        report_type = self.security_report_type(json_body)
        if report_type is None:
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                key.id,
                Outcome.INVALID,
                "security_report_type",
                category=data_category,
            )
            raise APIError("Unrecognized security report type")
        interface = get_interface(report_type)

        try:
            instance = interface.from_raw(json_body)
        except jsonschema.ValidationError as e:
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                key.id,
                Outcome.INVALID,
                "security_report",
                category=data_category,
            )
            raise APIError("Invalid security report: %s" %
                           str(e).splitlines()[0])

        # Do origin check based on the `document-uri` key as explained in `_dispatch`.
        origin = instance.get_origin()
        if not is_valid_origin(origin, project):
            track_outcome(
                project_config.organization_id,
                project_config.project_id,
                key.id,
                Outcome.INVALID,
                FilterStatKeys.CORS,
                category=data_category,
            )
            raise APIForbidden("Invalid origin")

        data = {
            "interface": interface.path,
            "report": instance,
            "release": request.GET.get("sentry_release"),
            "environment": request.GET.get("sentry_environment"),
        }

        self.process(request,
                     project=project,
                     helper=helper,
                     data=data,
                     key=key,
                     project_config=project_config,
                     **kwargs)

        return HttpResponse(content_type="application/javascript", status=201)
Example #21
0
    def _dispatch(self,
                  request,
                  helper,
                  project_id=None,
                  origin=None,
                  *args,
                  **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)
            Raven.tags_context(helper.context.get_tags_context())

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                raise APIForbidden('Invalid origin: %s' % (origin, ))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            project_ = helper.project_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                if not project_:
                    raise APIError('Unable to identify project')
                project = project_
                helper.context.bind_project(project)
            elif project_ != project:
                raise APIError('Two different project were specified')

            helper.context.bind_auth(auth)
            Raven.tags_context(helper.context.get_tags_context())

            if auth.version != '2.0':
                if not auth.secret_key:
                    # If we're missing a secret_key, check if we are allowed
                    # to do a CORS request.

                    # If we're missing an Origin/Referrer header entirely,
                    # we only want to support this on GET requests. By allowing
                    # un-authenticated CORS checks for POST, we basially
                    # are obsoleting our need for a secret key entirely.
                    if origin is None and request.method != 'GET':
                        raise APIForbidden(
                            'Missing required attribute in authentication header: sentry_secret'
                        )

                    if not is_valid_origin(origin, project):
                        raise APIForbidden(
                            'Missing required Origin or Referer header')

            response = super(APIView, self).dispatch(request=request,
                                                     project=project,
                                                     auth=auth,
                                                     helper=helper,
                                                     **kwargs)

        if origin:
            response['Access-Control-Allow-Origin'] = origin

        return response
Example #22
0
    def _dispatch(self, request, helper, project_id=None, origin=None,
                  *args, **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)
            Raven.tags_context(helper.context.get_tags_context())

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                raise APIForbidden('Invalid origin: %s' % (origin,))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            project_ = helper.project_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                if not project_:
                    raise APIError('Unable to identify project')
                project = project_
            elif project_ != project:
                raise APIError('Two different project were specified')

            helper.context.bind_auth(auth)
            Raven.tags_context(helper.context.get_tags_context())

            if auth.version != '2.0':
                if request.method == 'GET':
                    # GET only requires an Origin/Referer check
                    # If an Origin isn't passed, it's possible that the project allows no origin,
                    # so we need to explicitly check for that here. If Origin is not None,
                    # it can be safely assumed that it was checked previously and it's ok.
                    if origin is None and not is_valid_origin(origin, project):
                        # Special case an error message for a None origin when None wasn't allowed
                        raise APIForbidden('Missing required Origin or Referer header')
                else:
                    # Version 3 enforces secret key for server side requests
                    if not auth.secret_key:
                        raise APIForbidden('Missing required attribute in authentication header: sentry_secret')

            response = super(APIView, self).dispatch(
                request=request,
                project=project,
                auth=auth,
                helper=helper,
                **kwargs
            )

        if origin:
            response['Access-Control-Allow-Origin'] = origin

        return response
Example #23
0
class APIView(BaseView):
    def _get_project_from_id(self, project_id):
        if project_id:
            if project_id.isdigit():
                lookup_kwargs = {'id': int(project_id)}
            else:
                lookup_kwargs = {'slug': project_id}

            try:
                return Project.objects.get_from_cache(**lookup_kwargs)
            except Project.DoesNotExist:
                raise APIError('Invalid project_id: %r' % project_id)
        return None

    def _parse_header(self, request, project):
        try:
            auth_vars = extract_auth_vars(request)
        except (IndexError, ValueError):
            raise APIError('Invalid auth header')

        if not auth_vars:
            raise APIError(
                'Client/server version mismatch: Unsupported client')

        server_version = auth_vars.get('sentry_version', '1.0')
        client = auth_vars.get('sentry_client',
                               request.META.get('HTTP_USER_AGENT'))

        if server_version not in ('2.0', '3', '4'):
            raise APIError(
                'Client/server version mismatch: Unsupported protocol version (%s)'
                % server_version)

        if not client:
            raise APIError(
                'Client request error: Missing client version identifier')

        return auth_vars

    @csrf_exempt
    def dispatch(self, request, project_id=None, *args, **kwargs):
        try:
            origin = self.get_request_origin(request)

            response = self._dispatch(request,
                                      project_id=project_id,
                                      *args,
                                      **kwargs)
        except Exception:
            response = HttpResponse(status=500)

        if response.status_code != 200:
            # Set X-Sentry-Error as in many cases it is easier to inspect the headers
            response[
                'X-Sentry-Error'] = response.content[:
                                                     200]  # safety net on content length

            if response.status_code == 500:
                log = logger.error
                exc_info = True
            else:
                log = logger.info
                exc_info = None

            log('status=%s project_id=%s user_id=%s ip=%s agent=%s %s',
                response.status_code,
                project_id,
                request.user.is_authenticated() and request.user.id or None,
                request.META['REMOTE_ADDR'],
                request.META.get('HTTP_USER_AGENT'),
                response['X-Sentry-Error'],
                extra={
                    'request': request,
                },
                exc_info=exc_info)

            if origin:
                # We allow all origins on errors
                response['Access-Control-Allow-Origin'] = '*'

        if origin:
            response['Access-Control-Allow-Headers'] = 'X-Sentry-Auth, X-Requested-With, Origin, Accept, Content-Type, ' \
                'Authentication'
            response['Access-Control-Allow-Methods'] = ', '.join(
                self._allowed_methods())

        return response

    def get_request_origin(self, request):
        """
        Returns either the Origin or Referer value from the request headers.
        """
        return request.META.get('HTTP_ORIGIN',
                                request.META.get('HTTP_REFERER'))

    def _dispatch(self, request, project_id=None, *args, **kwargs):
        request.user = AnonymousUser()

        try:
            project = self._get_project_from_id(project_id)
        except APIError, e:
            return HttpResponse(str(e), content_type='text/plain', status=400)

        origin = self.get_request_origin(request)
        if origin is not None:
            if not project:
                return HttpResponse(
                    'Your client must be upgraded for CORS support.')
            elif not is_valid_origin(origin, project):
                return HttpResponse('Invalid origin: %r' % origin,
                                    content_type='text/plain',
                                    status=400)

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            try:
                auth_vars = self._parse_header(request, project)
            except APIError, e:
                return HttpResponse(str(e),
                                    content_type='text/plain',
                                    status=400)

            try:
                project_, user = project_from_auth_vars(auth_vars)
            except APIError, error:
                return HttpResponse(unicode(error.msg),
                                    status=error.http_status)
Example #24
0
 def isValidOrigin(self, origin, inputs):
     with mock.patch('sentry.utils.http.get_origins') as get_origins:
         get_origins.return_value = inputs
         result = is_valid_origin(origin, self.project)
         get_origins.assert_called_once_with(self.project)
     return result
Example #25
0
def fetch_file(url,
               project=None,
               release=None,
               dist=None,
               allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise http.CannotFetch({
            'type': EventError.JS_MISSING_SOURCE,
            'url': http.expose_url(url),
        })
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(result[0], result[1],
                                    zlib.decompress(result[2]), result[3],
                                    encoding)

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option('sentry:verify_ssl', False))
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option(
                    'sentry:token_header',
                    'X-Sentry-Token',
                )
                headers[token_header] = token

        with metrics.timer('sourcemaps.fetch'):
            result = http.fetch_file(url,
                                     headers=headers,
                                     verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url, result.headers, z_body, result.status, result.encoding),
                60)

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(result.url, result.headers,
                                    result.body.encode('utf8'), result.status,
                                    result.encoding)
        except UnicodeEncodeError:
            error = {
                'type': EventError.FETCH_INVALID_ENCODING,
                'value': 'utf8',
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        body_start = result.body[:20].lstrip(
        )  # Discard leading whitespace (often found before doctype)

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise http.CannotFetch(error)

    return result
Example #26
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    if release:
        result = fetch_release_file(url, release)
    elif not allow_scraping or not url.startswith(('http:', 'https:')):
        error = {
            'type': EventError.JS_MISSING_SOURCE,
            'url': url,
        }
        raise CannotFetchSource(error)
    else:
        result = None

    cache_key = 'source:cache:v2:%s' % (
        md5(url).hexdigest(),
    )

    if result is None:
        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5(domain).hexdigest(),
        )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        http_session = http.build_session()
        try:
            response = http_session.get(
                url,
                allow_redirects=True,
                verify=False,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, SuspiciousOperation):
                error = {
                    'type': EventError.SECURITY_VIOLATION,
                    'value': unicode(exc),
                    'url': url,
                }
            elif isinstance(exc, (RequestException, ZeroReturnError)):
                error = {
                    'type': EventError.JS_GENERIC_FETCH_ERROR,
                    'value': str(type(exc)),
                    'url': url,
                }
            else:
                logger.exception(unicode(exc))
                error = {
                    'type': EventError.UNKNOWN_ERROR,
                    'url': url,
                }

            # TODO(dcramer): we want to be less aggressive on disabling domains
            cache.set(domain_key, error or '', 300)
            logger.warning('Disabling sources to %s for %ss', domain, 300,
                           exc_info=True)
            raise CannotFetchSource(error)

        # requests' attempts to use chardet internally when no encoding is found
        # and we want to avoid that slow behavior
        if not response.encoding:
            response.encoding = 'utf-8'

        result = (
            {k.lower(): v for k, v in response.headers.items()},
            response.text,
            response.status_code,
        )
        cache.set(cache_key, result, 60)

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url,
                     exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': url,
        }
        raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1])
Example #27
0
def fetch_url(url, project=None, release=None):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    cache_key = 'source:cache:v2:%s' % (hashlib.md5(
        url.encode('utf-8')).hexdigest(), )

    if release:
        result = fetch_release_file(url, release)
    else:
        result = None

    if result is None:
        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:%s' % (hashlib.md5(
            domain.encode('utf-8')).hexdigest(), )
        domain_result = cache.get(domain_key)
        if domain_result:
            raise DomainBlacklisted(
                ERR_DOMAIN_BLACKLISTED.format(reason=domain_result, ))

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        http_session = http.build_session()
        try:
            response = http_session.get(
                url,
                allow_redirects=True,
                verify=False,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, SuspiciousOperation):
                error = unicode(exc)
            elif isinstance(exc, RequestException):
                error = ERR_GENERIC_FETCH_FAILURE.format(type=type(exc), )
            else:
                logger.exception(unicode(exc))
                error = ERR_UNKNOWN_INTERNAL_ERROR

            # TODO(dcramer): we want to be less aggressive on disabling domains
            cache.set(domain_key, error or '', 300)
            logger.warning('Disabling sources to %s for %ss',
                           domain,
                           300,
                           exc_info=True)
            raise CannotFetchSource(error)

        result = (
            {k.lower(): v
             for k, v in response.headers.items()},
            response.content,
            response.status_code,
        )
        cache.set(cache_key, result, 60)

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url, exc_info=True)
        error = ERR_HTTP_CODE.format(status_code=result[2], )
        raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1])
Example #28
0
                }, exc_info=True)
            return HttpResponse(unicode(error.msg), status=error.http_status)
        else:
            if user:
                request.user = user

        # Legacy API was /api/store/ and the project ID was only available elsewhere
        if not project:
            if not project_:
                return HttpResponse('Unable to identify project', status=400)
            project = project_
        elif project_ != project:
            return HttpResponse('Project ID mismatch', status=400)

        origin = request.META.get('HTTP_ORIGIN', None)
        if origin is not None and not is_valid_origin(origin, project):
            return HttpResponse('Invalid origin: %r' % origin, status=400)

        auth = Auth(auth_vars)

        try:
            response = super(APIView, self).dispatch(request, project=project, auth=auth, **kwargs)

        except APIError, error:
            logger.info('Project %r raised API error: %s', project.slug, error, extra={
                'request': request,
            }, exc_info=True)
            response = HttpResponse(unicode(error.msg), status=error.http_status)

        response = apply_access_control_headers(response, origin)
Example #29
0
def fetch_url(url, project=None, release=None):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    cache_key = 'source:cache:v2:%s' % (md5(url).hexdigest(), )

    if release:
        result = fetch_release_file(url, release)
    else:
        result = None

    if result is None:
        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (md5(domain).hexdigest(), )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        http_session = http.build_session()
        try:
            response = http_session.get(
                url,
                allow_redirects=True,
                verify=False,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, SuspiciousOperation):
                error = {
                    'type': EventError.SECURITY_VIOLATION,
                    'value': unicode(exc),
                    'url': url,
                }
            elif isinstance(exc, (RequestException, ZeroReturnError)):
                error = {
                    'type': EventError.JS_GENERIC_FETCH_ERROR,
                    'value': str(type(exc)),
                    'url': url,
                }
            else:
                logger.exception(unicode(exc))
                error = {
                    'type': EventError.UNKNOWN_ERROR,
                    'url': url,
                }

            # TODO(dcramer): we want to be less aggressive on disabling domains
            cache.set(domain_key, error or '', 300)
            logger.warning('Disabling sources to %s for %ss',
                           domain,
                           300,
                           exc_info=True)
            raise CannotFetchSource(error)

        # requests' attempts to use chardet internally when no encoding is found
        # and we want to avoid that slow behavior
        if not response.encoding:
            response.encoding = 'utf-8'

        result = (
            {k.lower(): v
             for k, v in response.headers.items()},
            response.text,
            response.status_code,
        )
        cache.set(cache_key, result, 60)

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url, exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': url,
        }
        raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1])
Example #30
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    if release:
        result = fetch_release_file(url, release)
    else:
        result = None

    cache_key = 'source:cache:v3:%s' % (md5(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': url,
            }
            raise CannotFetchSource(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            body = zlib.decompress(result[1])
            result = (result[0], force_text(body), result[2])

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (md5(domain).hexdigest(), )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        http_session = http.build_session()
        try:
            response = http_session.get(
                url,
                allow_redirects=True,
                verify=False,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, RestrictedIPAddress):
                error = {
                    'type': EventError.RESTRICTED_IP,
                    'url': url,
                }
            elif isinstance(exc, SuspiciousOperation):
                error = {
                    'type': EventError.SECURITY_VIOLATION,
                    'url': url,
                }
            elif isinstance(exc, (RequestException, ZeroReturnError)):
                error = {
                    'type': EventError.JS_GENERIC_FETCH_ERROR,
                    'value': str(type(exc)),
                    'url': url,
                }
            else:
                logger.exception(unicode(exc))
                error = {
                    'type': EventError.UNKNOWN_ERROR,
                    'url': url,
                }

            # TODO(dcramer): we want to be less aggressive on disabling domains
            cache.set(domain_key, error or '', 300)
            logger.warning('Disabling sources to %s for %ss',
                           domain,
                           300,
                           exc_info=True)
            raise CannotFetchSource(error)

        # requests' attempts to use chardet internally when no encoding is found
        # and we want to avoid that slow behavior
        if not response.encoding:
            response.encoding = 'utf-8'

        body = response.text
        z_body = zlib.compress(force_bytes(body))
        headers = {k.lower(): v for k, v in response.headers.items()}

        cache.set(cache_key, (headers, z_body, response.status_code), 60)
        result = (headers, body, response.status_code)

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url, exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': url,
        }
        raise CannotFetchSource(error)

    # Make sure the file we're getting back is unicode, if it's not,
    # it's either some encoding that we don't understand, or it's binary
    # data which we can't process.
    if not isinstance(result[1], unicode):
        try:
            result = (result[0], result[1].decode('utf8'), result[2])
        except UnicodeDecodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': url,
            }
            raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1])
Example #31
0
    def dispatch(self, request, *args, **kwargs):
        """
        Identical to rest framework's dispatch except we add the ability
        to convert arguments (for common URL params).
        """
        self.args = args
        self.kwargs = kwargs
        request = self.initialize_request(request, *args, **kwargs)
        self.request = request
        self.headers = self.default_response_headers  # deprecate?

        if settings.SENTRY_API_RESPONSE_DELAY:
            time.sleep(settings.SENTRY_API_RESPONSE_DELAY / 1000.0)

        origin = request.META.get('HTTP_ORIGIN', 'null')
        # A "null" value should be treated as no Origin for us.
        # See RFC6454 for more information on this behavior.
        if origin == 'null':
            origin = None

        try:
            if origin and request.auth:
                allowed_origins = request.auth.get_allowed_origins()
                if not is_valid_origin(origin, allowed=allowed_origins):
                    response = Response('Invalid origin: %s' %
                                        (origin, ), status=400)
                    self.response = self.finalize_response(
                        request, response, *args, **kwargs)
                    return self.response

            self.initial(request, *args, **kwargs)

            if getattr(request, 'user', None) and request.user.is_authenticated():
                raven.user_context({
                    'id': request.user.id,
                    'username': request.user.username,
                    'email': request.user.email,
                })

            # Get the appropriate handler method
            if request.method.lower() in self.http_method_names:
                handler = getattr(self, request.method.lower(),
                                  self.http_method_not_allowed)

                (args, kwargs) = self.convert_args(request, *args, **kwargs)
                self.args = args
                self.kwargs = kwargs
            else:
                handler = self.http_method_not_allowed

            response = handler(request, *args, **kwargs)

        except Exception as exc:
            response = self.handle_exception(request, exc)

        if origin:
            self.add_cors_headers(request, response)

        self.response = self.finalize_response(
            request, response, *args, **kwargs)

        return self.response
Example #32
0
    def _dispatch(self, request, project_id=None, *args, **kwargs):
        request.user = AnonymousUser()

        try:
            project = self._get_project_from_id(project_id)
        except APIError as e:
            raise InvalidRequest(str(e))

        if project:
            Raven.tags_context({'project': project.id})

        origin = self.get_request_origin(request)
        if origin is not None and not project:
            raise InvalidRequest('Your client must be upgraded for CORS support.')

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            if is_valid_origin(origin, project):
                response = self.options(request, project)
            else:
                raise InvalidOrigin(origin)
        else:
            try:
                auth_vars = self._parse_header(request, project)
            except APIError as e:
                raise InvalidRequest(str(e))

            try:
                project_, user = project_from_auth_vars(auth_vars)
            except APIError as error:
                return HttpResponse(unicode(error.msg), status=error.http_status)
            else:
                if user:
                    request.user = user

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                if not project_:
                    raise InvalidRequest('Unable to identify project')
                project = project_
            elif project_ != project:
                raise InvalidRequest('Project ID mismatch')
            else:
                Raven.tags_context({'project': project.id})

            auth = Auth(auth_vars, is_public=bool(origin))

            if auth.version >= 3:
                if request.method == 'GET':
                    # GET only requires an Origin/Referer check
                    if not is_valid_origin(origin, project):
                        if origin is None:
                            raise InvalidRequest('Missing required Origin or Referer header')
                        else:
                            raise InvalidOrigin(origin)
                else:
                    # Version 3 enforces secret key for server side requests
                    if not auth.secret_key:
                        raise InvalidRequest('Missing required attribute in authentication header: sentry_secret')

            try:
                response = super(APIView, self).dispatch(request, project=project, auth=auth, **kwargs)

            except APIError as error:
                response = HttpResponse(unicode(error.msg), content_type='text/plain', status=error.http_status)

        if origin:
            response['Access-Control-Allow-Origin'] = origin

        return response
Example #33
0
    def dispatch(self, request):
        try:
            event_id = request.GET['eventId']
        except KeyError:
            return self._json_response(request, status=400)

        key = self._get_project_key(request)
        if not key:
            return self._json_response(request, status=404)

        origin = self._get_origin(request)
        if not origin:
            return self._json_response(request, status=403)

        if not is_valid_origin(origin, key.project):
            return HttpResponse(status=403)

        if request.method == 'OPTIONS':
            return self._json_response(request)

        # TODO(dcramer): since we cant use a csrf cookie we should at the very
        # least sign the request / add some kind of nonce
        initial = {
            'name': request.GET.get('name'),
            'email': request.GET.get('email'),
        }

        form = UserReportForm(request.POST if request.method == 'POST' else None,
                              initial=initial)
        if form.is_valid():
            # TODO(dcramer): move this to post to the internal API
            report = form.save(commit=False)
            report.project = key.project
            report.event_id = event_id
            try:
                mapping = EventMapping.objects.get(
                    event_id=report.event_id,
                    project_id=key.project_id,
                )
            except EventMapping.DoesNotExist:
                # XXX(dcramer): the system should fill this in later
                pass
            else:
                report.group = Group.objects.get(id=mapping.group_id)

            try:
                with transaction.atomic():
                    report.save()
            except IntegrityError:
                # There was a duplicate, so just overwrite the existing
                # row with the new one. The only way this ever happens is
                # if someone is messing around with the API, or doing
                # something wrong with the SDK, but this behavior is
                # more reasonable than just hard erroring and is more
                # expected.
                UserReport.objects.filter(
                    project=report.project,
                    event_id=report.event_id,
                ).update(
                    name=report.name,
                    email=report.email,
                    comments=report.comments,
                    date_added=timezone.now(),
                )
            return self._json_response(request)
        elif request.method == 'POST':
            return self._json_response(request, {
                "errors": dict(form.errors),
            }, status=400)

        template = render_to_string('sentry/error-page-embed.html', {
            'form': form,
        })

        context = {
            'endpoint': mark_safe('*/' + json.dumps(request.build_absolute_uri()) + ';/*'),
            'template': mark_safe('*/' + json.dumps(template) + ';/*'),
        }

        return render_to_response('sentry/error-page-embed.js', context, request,
                                  content_type='text/javascript')
Example #34
0
    def dispatch(self, request):
        try:
            event_id = request.GET["eventId"]
        except KeyError:
            return self._smart_response(
                request, {"eventId": "Missing or invalid parameter."}, status=400
            )

        normalized_event_id = normalize_event_id(event_id)
        if normalized_event_id:
            event_id = normalized_event_id
        elif event_id:
            return self._smart_response(
                request, {"eventId": "Missing or invalid parameter."}, status=400
            )

        key = self._get_project_key(request)
        if not key:
            return self._smart_response(
                request, {"dsn": "Missing or invalid parameter."}, status=404
            )

        origin = self._get_origin(request)
        if not is_valid_origin(origin, key.project):
            return self._smart_response(request, status=403)

        if request.method == "OPTIONS":
            return self._smart_response(request)

        # customization options
        options = DEFAULT_OPTIONS.copy()
        for name in six.iterkeys(options):
            if name in request.GET:
                options[name] = six.text_type(request.GET[name])

        # TODO(dcramer): since we cant use a csrf cookie we should at the very
        # least sign the request / add some kind of nonce
        initial = {"name": request.GET.get("name"), "email": request.GET.get("email")}

        form = UserReportForm(request.POST if request.method == "POST" else None, initial=initial)
        if form.is_valid():
            # TODO(dcramer): move this to post to the internal API
            report = form.save(commit=False)
            report.project_id = key.project_id
            report.event_id = event_id

            event = eventstore.get_event_by_id(report.project_id, report.event_id)

            if event is not None:
                report.environment_id = event.get_environment().id
                report.group_id = event.group_id

            try:
                with transaction.atomic():
                    report.save()
            except IntegrityError:
                # There was a duplicate, so just overwrite the existing
                # row with the new one. The only way this ever happens is
                # if someone is messing around with the API, or doing
                # something wrong with the SDK, but this behavior is
                # more reasonable than just hard erroring and is more
                # expected.
                UserReport.objects.filter(
                    project_id=report.project_id, event_id=report.event_id
                ).update(
                    name=report.name,
                    email=report.email,
                    comments=report.comments,
                    date_added=timezone.now(),
                )

            else:
                if report.group_id:
                    report.notify()

            user_feedback_received.send(
                project=Project.objects.get(id=report.project_id),
                sender=self,
            )

            return self._smart_response(request)
        elif request.method == "POST":
            return self._smart_response(request, {"errors": dict(form.errors)}, status=400)

        show_branding = (
            ProjectOption.objects.get_value(
                project=key.project, key="feedback:branding", default="1"
            )
            == "1"
        )

        template = render_to_string(
            "sentry/error-page-embed.html",
            context={
                "form": form,
                "show_branding": show_branding,
                "title": options["title"],
                "subtitle": options["subtitle"],
                "subtitle2": options["subtitle2"],
                "name_label": options["labelName"],
                "email_label": options["labelEmail"],
                "comments_label": options["labelComments"],
                "submit_label": options["labelSubmit"],
                "close_label": options["labelClose"],
            },
        )

        context = {
            "endpoint": mark_safe("*/" + json.dumps(absolute_uri(request.get_full_path())) + ";/*"),
            "template": mark_safe("*/" + json.dumps(template) + ";/*"),
            "strings": json.dumps_htmlsafe(
                {
                    "generic_error": six.text_type(options["errorGeneric"]),
                    "form_error": six.text_type(options["errorFormEntry"]),
                    "sent_message": six.text_type(options["successMessage"]),
                }
            ),
        }

        return render_to_response(
            "sentry/error-page-embed.js", context, request, content_type="text/javascript"
        )
Example #35
0
    def _dispatch(self, request, project_id=None, *args, **kwargs):
        request.user = AnonymousUser()

        try:
            project = self._get_project_from_id(project_id)
        except APIError as e:
            raise InvalidRequest(str(e))

        if project:
            Raven.tags_context({'project': project.id})

        origin = self.get_request_origin(request)
        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise InvalidRequest('Your client must be upgraded for CORS support.')
            if not is_valid_origin(origin, project):
                raise InvalidOrigin(origin)

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            try:
                auth_vars = self._parse_header(request, project)
            except APIError as e:
                raise InvalidRequest(str(e))

            try:
                project_ = project_from_auth_vars(auth_vars)
            except APIError as error:
                return HttpResponse(six.text_type(error.msg), status=error.http_status)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                if not project_:
                    raise InvalidRequest('Unable to identify project')
                project = project_
            elif project_ != project:
                raise InvalidRequest('Project ID mismatch')
            else:
                Raven.tags_context({'project': project.id})

            auth = Auth(auth_vars, is_public=bool(origin))

            if auth.version >= 3:
                if request.method == 'GET':
                    # GET only requires an Origin/Referer check
                    # If an Origin isn't passed, it's possible that the project allows no origin,
                    # so we need to explicitly check for that here. If Origin is not None,
                    # it can be safely assumed that it was checked previously and it's ok.
                    if origin is None and not is_valid_origin(origin, project):
                        # Special case an error message for a None origin when None wasn't allowed
                        raise InvalidRequest('Missing required Origin or Referer header')
                else:
                    # Version 3 enforces secret key for server side requests
                    if not auth.secret_key:
                        raise InvalidRequest('Missing required attribute in authentication header: sentry_secret')

            try:
                response = super(APIView, self).dispatch(request, project=project, auth=auth, **kwargs)

            except APIError as error:
                response = HttpResponse(six.text_type(error.msg), content_type='text/plain', status=error.http_status)
                if isinstance(error, APIRateLimited) and error.retry_after is not None:
                    response['Retry-After'] = str(error.retry_after)

        if origin:
            response['Access-Control-Allow-Origin'] = origin

        return response
Example #36
0
    def dispatch(self, request):
        try:
            event_id = request.GET['eventId']
        except KeyError:
            return self._smart_response(
                request, {'eventId': 'Missing or invalid parameter.'},
                status=400)

        normalized_event_id = normalize_event_id(event_id)
        if normalized_event_id:
            event_id = normalized_event_id
        elif event_id:
            return self._smart_response(
                request, {'eventId': 'Missing or invalid parameter.'},
                status=400)

        key = self._get_project_key(request)
        if not key:
            return self._smart_response(
                request, {'dsn': 'Missing or invalid parameter.'}, status=404)

        origin = self._get_origin(request)
        if not is_valid_origin(origin, key.project):
            return self._smart_response(request, status=403)

        if request.method == 'OPTIONS':
            return self._smart_response(request)

        # customization options
        options = DEFAULT_OPTIONS.copy()
        for name in six.iterkeys(options):
            if name in request.GET:
                options[name] = six.text_type(request.GET[name])

        # TODO(dcramer): since we cant use a csrf cookie we should at the very
        # least sign the request / add some kind of nonce
        initial = {
            'name': request.GET.get('name'),
            'email': request.GET.get('email'),
        }

        form = UserReportForm(
            request.POST if request.method == 'POST' else None,
            initial=initial)
        if form.is_valid():
            # TODO(dcramer): move this to post to the internal API
            report = form.save(commit=False)
            report.project = key.project
            report.event_id = event_id

            try:
                event = Event.objects.filter(project_id=report.project.id,
                                             event_id=report.event_id)[0]
            except IndexError:
                try:
                    report.group = Group.objects.from_event_id(
                        report.project, report.event_id)
                except Group.DoesNotExist:
                    pass
            else:
                Event.objects.bind_nodes([event])
                report.environment = event.get_environment()
                report.group = event.group

            try:
                with transaction.atomic():
                    report.save()
            except IntegrityError:
                # There was a duplicate, so just overwrite the existing
                # row with the new one. The only way this ever happens is
                # if someone is messing around with the API, or doing
                # something wrong with the SDK, but this behavior is
                # more reasonable than just hard erroring and is more
                # expected.
                UserReport.objects.filter(
                    project=report.project,
                    event_id=report.event_id,
                ).update(
                    name=report.name,
                    email=report.email,
                    comments=report.comments,
                    date_added=timezone.now(),
                )

            else:
                if report.group:
                    report.notify()

            user_feedback_received.send(project=report.project,
                                        group=report.group,
                                        sender=self)

            return self._smart_response(request)
        elif request.method == 'POST':
            return self._smart_response(request, {
                "errors": dict(form.errors),
            },
                                        status=400)

        show_branding = ProjectOption.objects.get_value(
            project=key.project, key='feedback:branding', default='1') == '1'

        template = render_to_string(
            'sentry/error-page-embed.html', {
                'form': form,
                'show_branding': show_branding,
                'title': options['title'],
                'subtitle': options['subtitle'],
                'subtitle2': options['subtitle2'],
                'name_label': options['labelName'],
                'email_label': options['labelEmail'],
                'comments_label': options['labelComments'],
                'submit_label': options['labelSubmit'],
                'close_label': options['labelClose'],
            })

        context = {
            'endpoint':
            mark_safe('*/' + json.dumps(request.build_absolute_uri()) + ';/*'),
            'template':
            mark_safe('*/' + json.dumps(template) + ';/*'),
            'strings':
            json.dumps_htmlsafe({
                'generic_error':
                six.text_type(options['errorGeneric']),
                'form_error':
                six.text_type(options['errorFormEntry']),
                'sent_message':
                six.text_type(options['successMessage']),
            }),
        }

        return render_to_response('sentry/error-page-embed.js',
                                  context,
                                  request,
                                  content_type='text/javascript')
Example #37
0
def fetch_file(url,
               project=None,
               release=None,
               dist=None,
               allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the database first (assuming there's a release on the
    event), then the internet. Caches the result of each of those two attempts
    separately, whether or not those attempts are successful. Used for both
    source files and source maps.
    """

    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == "...":
        raise http.CannotFetch({
            "type": EventError.JS_MISSING_SOURCE,
            "url": http.expose_url(url)
        })

    # if we've got a release to look on, try that first (incl associated cache)
    if release:
        with metrics.timer("sourcemaps.release_file"):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    # otherwise, try the web-scraping cache and then the web itself

    cache_key = "source:cache:v4:%s" % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(("http:", "https:")):
            error = {
                "type": EventError.JS_MISSING_SOURCE,
                "url": http.expose_url(url)
            }
            raise http.CannotFetch(error)

        logger.debug("Checking cache for url %r", url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(result[0], result[1],
                                    zlib.decompress(result[2]), result[3],
                                    encoding)

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option("sentry:verify_ssl", False))
            token = project.get_option("sentry:token")
            if token:
                token_header = project.get_option(
                    "sentry:token_header") or "X-Sentry-Token"
                headers[token_header] = token

        with metrics.timer("sourcemaps.fetch"):
            result = http.fetch_file(url,
                                     headers=headers,
                                     verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url, result.headers, z_body, result.status, result.encoding),
                get_max_age(result.headers),
            )

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch({
            "type": EventError.FETCH_INVALID_HTTP_CODE,
            "value": result.status,
            "url": http.expose_url(url),
        })

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(
                result.url,
                result.headers,
                result.body.encode("utf8"),
                result.status,
                result.encoding,
            )
        except UnicodeEncodeError:
            error = {
                "type": EventError.FETCH_INVALID_ENCODING,
                "value": "utf8",
                "url": http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if urlsplit(url).path.endswith(".js"):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == u"<":
            error = {"type": EventError.JS_INVALID_CONTENT, "url": url}
            raise http.CannotFetch(error)

    return result
Example #38
0
    def _dispatch(self,
                  request,
                  helper,
                  project_id=None,
                  origin=None,
                  *args,
                  **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)
            Raven.tags_context(helper.context.get_tags_context())

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                tsdb.incr(tsdb.models.project_total_received_cors, project.id)
                raise APIForbidden('Invalid origin: %s' % (origin, ))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            key = helper.project_key_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                project = Project.objects.get_from_cache(id=key.project_id)
                helper.context.bind_project(project)
            elif key.project_id != project.id:
                raise APIError('Two different projects were specified')

            helper.context.bind_auth(auth)
            Raven.tags_context(helper.context.get_tags_context())

            # Explicitly bind Organization so we don't implicitly query it later
            # this just allows us to comfortably assure that `project.organization` is safe.
            # This also allows us to pull the object from cache, instead of being
            # implicitly fetched from database.
            project.organization = Organization.objects.get_from_cache(
                id=project.organization_id)

            if auth.version != '2.0':
                if not auth.secret_key:
                    # If we're missing a secret_key, check if we are allowed
                    # to do a CORS request.

                    # If we're missing an Origin/Referrer header entirely,
                    # we only want to support this on GET requests. By allowing
                    # un-authenticated CORS checks for POST, we basially
                    # are obsoleting our need for a secret key entirely.
                    if origin is None and request.method != 'GET':
                        raise APIForbidden(
                            'Missing required attribute in authentication header: sentry_secret'
                        )

                    if not is_valid_origin(origin, project):
                        if project:
                            tsdb.incr(tsdb.models.project_total_received_cors,
                                      project.id)
                        raise APIForbidden(
                            'Missing required Origin or Referer header')

            response = super(APIView, self).dispatch(request=request,
                                                     project=project,
                                                     auth=auth,
                                                     helper=helper,
                                                     key=key,
                                                     **kwargs)

        if origin:
            if origin == 'null':
                # If an Origin is `null`, but we got this far, that means
                # we've gotten past our CORS check for some reason. But the
                # problem is that we can't return "null" as a valid response
                # to `Access-Control-Allow-Origin` and we don't have another
                # value to work with, so just allow '*' since they've gotten
                # this far.
                response['Access-Control-Allow-Origin'] = '*'
            else:
                response['Access-Control-Allow-Origin'] = origin

        return response
    def dispatch(self, request):
        try:
            event_id = request.GET['eventId']
        except KeyError:
            return self._json_response(request, status=400)

        if not is_event_id(event_id):
            return self._json_response(request, status=400)

        key = self._get_project_key(request)
        if not key:
            return self._json_response(request, status=404)

        origin = self._get_origin(request)
        if not origin:
            return self._json_response(request, status=403)

        if not is_valid_origin(origin, key.project):
            return HttpResponse(status=403)

        if request.method == 'OPTIONS':
            return self._json_response(request)

        # TODO(dcramer): since we cant use a csrf cookie we should at the very
        # least sign the request / add some kind of nonce
        initial = {
            'name': request.GET.get('name'),
            'email': request.GET.get('email'),
        }

        form = UserReportForm(
            request.POST if request.method == 'POST' else None,
            initial=initial)
        if form.is_valid():
            # TODO(dcramer): move this to post to the internal API
            report = form.save(commit=False)
            report.project = key.project
            report.event_id = event_id

            try:
                event = Event.objects.filter(
                    project_id=report.project.id,
                    event_id=report.event_id).select_related('group')[0]
            except IndexError:
                try:
                    report.group = Group.objects.from_event_id(
                        report.project, report.event_id)
                except Group.DoesNotExist:
                    pass
            else:
                report.environment = event.get_environment()
                report.group = event.group

            try:
                with transaction.atomic():
                    report.save()
            except IntegrityError:
                # There was a duplicate, so just overwrite the existing
                # row with the new one. The only way this ever happens is
                # if someone is messing around with the API, or doing
                # something wrong with the SDK, but this behavior is
                # more reasonable than just hard erroring and is more
                # expected.
                UserReport.objects.filter(
                    project=report.project,
                    event_id=report.event_id,
                ).update(
                    name=report.name,
                    email=report.email,
                    comments=report.comments,
                    date_added=timezone.now(),
                )

            else:
                if report.group:
                    report.notify()

            user_feedback_received.send(project=report.project,
                                        group=report.group,
                                        sender=self)

            return self._json_response(request)
        elif request.method == 'POST':
            return self._json_response(request, {
                "errors": dict(form.errors),
            },
                                       status=400)

        show_branding = ProjectOption.objects.get_value(
            project=key.project, key='feedback:branding', default='1') == '1'

        template = render_to_string('sentry/error-page-embed.html', {
            'form': form,
            'show_branding': show_branding,
        })

        context = {
            'endpoint':
            mark_safe('*/' + json.dumps(request.build_absolute_uri()) + ';/*'),
            'template':
            mark_safe('*/' + json.dumps(template) + ';/*'),
            'strings':
            json.dumps_htmlsafe({
                'generic_error': six.text_type(GENERIC_ERROR),
                'form_error': six.text_type(FORM_ERROR),
                'sent_message': six.text_type(SENT_MESSAGE),
            }),
        }

        return render_to_response('sentry/error-page-embed.js',
                                  context,
                                  request,
                                  content_type='text/javascript')
Example #40
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise CannotFetchSource({
            'type': EventError.JS_MISSING_SOURCE,
            'url': expose_url(url),
        })
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release)
    else:
        result = None

    cache_key = 'source:cache:v3:%s' % (
        md5_text(url).hexdigest(),
    )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[3]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = (result[0], zlib.decompress(result[1]), result[2], encoding)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5_text(domain).hexdigest(),
        )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option(
                    'sentry:token_header',
                    'X-Sentry-Token',
                )
                headers[token_header] = token

        logger.debug('Fetching %r from the internet', url)

        with metrics.timer('sourcemaps.fetch'):
            http_session = http.build_session()
            response = None
            try:
                try:
                    start = time.time()
                    response = http_session.get(
                        url,
                        allow_redirects=True,
                        verify=False,
                        headers=headers,
                        timeout=settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT,
                        stream=True,
                    )

                    try:
                        cl = int(response.headers['content-length'])
                    except (LookupError, ValueError):
                        cl = 0
                    if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                        raise OverflowError()

                    contents = []
                    cl = 0

                    # Only need to even attempt to read the response body if we
                    # got a 200 OK
                    if response.status_code == 200:
                        for chunk in response.iter_content(16 * 1024):
                            if time.time() - start > settings.SENTRY_SOURCE_FETCH_TIMEOUT:
                                raise Timeout()
                            contents.append(chunk)
                            cl += len(chunk)
                            if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                                raise OverflowError()

                except Exception as exc:
                    logger.debug('Unable to fetch %r', url, exc_info=True)
                    if isinstance(exc, RestrictedIPAddress):
                        error = {
                            'type': EventError.RESTRICTED_IP,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, SuspiciousOperation):
                        error = {
                            'type': EventError.SECURITY_VIOLATION,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, Timeout):
                        error = {
                            'type': EventError.JS_FETCH_TIMEOUT,
                            'url': expose_url(url),
                            'timeout': settings.SENTRY_SOURCE_FETCH_TIMEOUT,
                        }
                    elif isinstance(exc, OverflowError):
                        error = {
                            'type': EventError.JS_TOO_LARGE,
                            'url': expose_url(url),
                            # We want size in megabytes to format nicely
                            'max_size': float(settings.SENTRY_SOURCE_FETCH_MAX_SIZE) / 1024 / 1024,
                        }
                    elif isinstance(exc, (RequestException, ZeroReturnError)):
                        error = {
                            'type': EventError.JS_GENERIC_FETCH_ERROR,
                            'value': six.text_type(type(exc)),
                            'url': expose_url(url),
                        }
                    else:
                        logger.exception(six.text_type(exc))
                        error = {
                            'type': EventError.UNKNOWN_ERROR,
                            'url': expose_url(url),
                        }

                    # TODO(dcramer): we want to be less aggressive on disabling domains
                    cache.set(domain_key, error or '', 300)
                    logger.warning('source.disabled', extra=error)
                    raise CannotFetchSource(error)

                body = b''.join(contents)
                z_body = zlib.compress(body)
                headers = {k.lower(): v for k, v in response.headers.items()}
                encoding = response.encoding

                cache.set(cache_key, (headers, z_body, response.status_code, encoding), 60)
                result = (headers, body, response.status_code, encoding)
            finally:
                if response is not None:
                    response.close()

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url,
                     exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': expose_url(url),
        }
        raise CannotFetchSource(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        body_start = result[1][:20].lstrip()  # Discard leading whitespace (often found before doctype)

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise CannotFetchSource(error)

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result[1], six.binary_type):
        try:
            result = (result[0], result[1].encode('utf8'), None)
        except UnicodeEncodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1], result[3])
Example #41
0
File: api.py Project: fbentz/sentry
            auth_vars = self._parse_header(request, project)
        except APIError, e:
            return HttpResponse(str(e), status=400)

        project_ = project_from_auth_vars(auth_vars)

        # Legacy API was /api/store/ and the project ID was only available elsewhere
        if not project:
            if not project_:
                return HttpResponse('Unable to identify project', status=400)
            project = project_
        elif project_ != project:
            return HttpResponse('Project ID mismatch', status=400)

        origin = request.META.get('HTTP_ORIGIN', None)
        if origin is not None and not is_valid_origin(origin, project):
            return HttpResponse('Invalid origin: %r' % origin, status=400)

        auth = Auth(auth_vars)

        try:
            response = super(APIView, self).dispatch(request,
                                                     project=project,
                                                     auth=auth,
                                                     **kwargs)

        except APIError, error:
            logger.info('Project %r raised API error: %s',
                        project.slug,
                        error,
                        extra={
Example #42
0
    def dispatch(self, request):
        try:
            event_id = request.GET['eventId']
        except KeyError:
            return self._json_response(request, status=400)

        key = self._get_project_key(request)
        if not key:
            return self._json_response(request, status=404)

        origin = self._get_origin(request)
        if not origin:
            return self._json_response(request, status=403)

        if not is_valid_origin(origin, key.project):
            return HttpResponse(status=403)

        if request.method == 'OPTIONS':
            return self._json_response(request)

        # TODO(dcramer): since we cant use a csrf cookie we should at the very
        # least sign the request / add some kind of nonce
        initial = {
            'name': request.GET.get('name'),
            'email': request.GET.get('email'),
        }

        form = UserReportForm(
            request.POST if request.method == 'POST' else None,
            initial=initial)
        if form.is_valid():
            report = form.save(commit=False)
            report.project = key.project
            report.event_id = event_id
            try:
                mapping = EventMapping.objects.get(
                    event_id=report.event_id,
                    project_id=key.project_id,
                )
            except EventMapping.DoesNotExist:
                # XXX(dcramer): the system should fill this in later
                pass
            else:
                report.group = Group.objects.get(id=mapping.group_id)
            report.save()
            return HttpResponse(status=200)
        elif request.method == 'POST':
            return self._json_response(request, {
                "errors": dict(form.errors),
            },
                                       status=400)

        template = render_to_string('sentry/error-page-embed.html', {
            'form': form,
        })

        context = {
            'endpoint': mark_safe(json.dumps(request.get_full_path())),
            'template': mark_safe(json.dumps(template)),
        }

        return render_to_response('sentry/error-page-embed.js',
                                  context,
                                  request,
                                  content_type='text/javascript')
Example #43
0
    def _dispatch(self, request, project_id=None, *args, **kwargs):
        request.user = AnonymousUser()

        try:
            project = self._get_project_from_id(project_id)
        except APIError as e:
            raise InvalidRequest(str(e))

        if project:
            Raven.tags_context({'project': project.id})

        origin = self.get_request_origin(request)
        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise InvalidRequest('Your client must be upgraded for CORS support.')
            if not is_valid_origin(origin, project):
                raise InvalidOrigin(origin)

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            try:
                auth_vars = self._parse_header(request, project)
            except APIError as e:
                raise InvalidRequest(str(e))

            try:
                project_, user = project_from_auth_vars(auth_vars)
            except APIError as error:
                return HttpResponse(six.text_type(error.msg), status=error.http_status)
            else:
                if user:
                    request.user = user

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                if not project_:
                    raise InvalidRequest('Unable to identify project')
                project = project_
            elif project_ != project:
                raise InvalidRequest('Project ID mismatch')
            else:
                Raven.tags_context({'project': project.id})

            auth = Auth(auth_vars, is_public=bool(origin))

            if auth.version >= 3:
                if request.method == 'GET':
                    # GET only requires an Origin/Referer check
                    # If an Origin isn't passed, it's possible that the project allows no origin,
                    # so we need to explicitly check for that here. If Origin is not None,
                    # it can be safely assumed that it was checked previously and it's ok.
                    if origin is None and not is_valid_origin(origin, project):
                        # Special case an error message for a None origin when None wasn't allowed
                        raise InvalidRequest('Missing required Origin or Referer header')
                else:
                    # Version 3 enforces secret key for server side requests
                    if not auth.secret_key:
                        raise InvalidRequest('Missing required attribute in authentication header: sentry_secret')

            try:
                response = super(APIView, self).dispatch(request, project=project, auth=auth, **kwargs)

            except APIError as error:
                response = HttpResponse(six.text_type(error.msg), content_type='text/plain', status=error.http_status)
                if isinstance(error, APIRateLimited) and error.retry_after is not None:
                    response['Retry-After'] = str(error.retry_after)

        if origin:
            response['Access-Control-Allow-Origin'] = origin

        return response
Example #44
0
    def dispatch(self, request, *args, **kwargs):
        """
        Identical to rest framework's dispatch except we add the ability
        to convert arguments (for common URL params).
        """
        self.args = args
        self.kwargs = kwargs
        request = self.initialize_request(request, *args, **kwargs)
        self.load_json_body(request)
        self.request = request
        self.headers = self.default_response_headers  # deprecate?

        # Tags that will ultimately flow into the metrics backend at the end of
        # the request (happens via middleware/stats.py).
        request._metric_tags = {}

        if settings.SENTRY_API_RESPONSE_DELAY:
            time.sleep(settings.SENTRY_API_RESPONSE_DELAY / 1000.0)

        origin = request.META.get('HTTP_ORIGIN', 'null')
        # A "null" value should be treated as no Origin for us.
        # See RFC6454 for more information on this behavior.
        if origin == 'null':
            origin = None

        try:
            if origin and request.auth:
                allowed_origins = request.auth.get_allowed_origins()
                if not is_valid_origin(origin, allowed=allowed_origins):
                    response = Response('Invalid origin: %s' % (origin, ),
                                        status=400)
                    self.response = self.finalize_response(
                        request, response, *args, **kwargs)
                    return self.response

            self.initial(request, *args, **kwargs)

            # Get the appropriate handler method
            if request.method.lower() in self.http_method_names:
                handler = getattr(self, request.method.lower(),
                                  self.http_method_not_allowed)

                (args, kwargs) = self.convert_args(request, *args, **kwargs)
                self.args = args
                self.kwargs = kwargs
            else:
                handler = self.http_method_not_allowed

            if getattr(request, 'access', None) is None:
                # setup default access
                request.access = access.from_request(request)

            response = handler(request, *args, **kwargs)

        except Exception as exc:
            response = self.handle_exception(request, exc)

        if origin:
            self.add_cors_headers(request, response)

        self.response = self.finalize_response(request, response, *args,
                                               **kwargs)

        return self.response
Example #45
0
File: api.py Project: mvaled/sentry
    def _dispatch(self, request, helper, project_id=None, origin=None,
                  *args, **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)
            Raven.tags_context(helper.context.get_tags_context())

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                raise APIForbidden('Invalid origin: %s' % (origin,))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            project_id = helper.project_id_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                project = Project.objects.get_from_cache(id=project_id)
                helper.context.bind_project(project)
            elif project_id != project.id:
                raise APIError('Two different projects were specified')

            helper.context.bind_auth(auth)
            Raven.tags_context(helper.context.get_tags_context())

            # Explicitly bind Organization so we don't implicitly query it later
            # this just allows us to comfortably assure that `project.organization` is safe.
            # This also allows us to pull the object from cache, instead of being
            # implicitly fetched from database.
            project.organization = Organization.objects.get_from_cache(id=project.organization_id)

            if auth.version != '2.0':
                if not auth.secret_key:
                    # If we're missing a secret_key, check if we are allowed
                    # to do a CORS request.

                    # If we're missing an Origin/Referrer header entirely,
                    # we only want to support this on GET requests. By allowing
                    # un-authenticated CORS checks for POST, we basially
                    # are obsoleting our need for a secret key entirely.
                    if origin is None and request.method != 'GET':
                        raise APIForbidden('Missing required attribute in authentication header: sentry_secret')

                    if not is_valid_origin(origin, project):
                        raise APIForbidden('Missing required Origin or Referer header')

            response = super(APIView, self).dispatch(
                request=request,
                project=project,
                auth=auth,
                helper=helper,
                **kwargs
            )

        if origin:
            response['Access-Control-Allow-Origin'] = origin

        return response
Example #46
0
 def test_setting_empty(self):
     with self.Settings(SENTRY_ALLOW_ORIGIN=None):
         self.assertFalse(is_valid_origin('http://example.com'))
Example #47
0
 def test_setting_uri(self):
     with self.Settings(SENTRY_ALLOW_ORIGIN='http://example.com'):
         self.assertTrue(is_valid_origin('http://example.com'))
Example #48
0
def fetch_url(url, project=None, release=None):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    cache_key = 'source:cache:v2:%s' % (
        hashlib.md5(url.encode('utf-8')).hexdigest(),)

    if release:
        result = fetch_release_file(url, release)
    else:
        result = None

    if result is None:
        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:%s' % (
            hashlib.md5(domain.encode('utf-8')).hexdigest(),
        )
        domain_result = cache.get(domain_key)
        if domain_result:
            raise DomainBlacklisted(ERR_DOMAIN_BLACKLISTED.format(
                reason=domain_result,
            ))

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        http_session = http.build_session()
        try:
            response = http_session.get(
                url,
                allow_redirects=True,
                verify=False,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, SuspiciousOperation):
                error = unicode(exc)
            elif isinstance(exc, RequestException):
                error = ERR_GENERIC_FETCH_FAILURE.format(
                    type=type(exc),
                )
            else:
                logger.exception(unicode(exc))
                error = ERR_UNKNOWN_INTERNAL_ERROR

            # TODO(dcramer): we want to be less aggressive on disabling domains
            cache.set(domain_key, error or '', 300)
            logger.warning('Disabling sources to %s for %ss', domain, 300,
                           exc_info=True)
            raise CannotFetchSource(error)

        result = (
            {k.lower(): v for k, v in response.headers.items()},
            response.content,
            response.status_code,
        )
        cache.set(cache_key, result, 60)

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url,
                     exc_info=True)
        error = ERR_HTTP_CODE.format(
            status_code=result[2],
        )
        raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1])
Example #49
0
    def dispatch(self, request, *args, **kwargs):
        """
        Identical to rest framework's dispatch except we add the ability
        to convert arguments (for common URL params).
        """
        with sentry_sdk.start_span(op="base.dispatch.setup",
                                   description=type(self).__name__):
            self.args = args
            self.kwargs = kwargs
            request = self.initialize_request(request, *args, **kwargs)
            self.load_json_body(request)
            self.request = request
            self.headers = self.default_response_headers  # deprecate?

        # Tags that will ultimately flow into the metrics backend at the end of
        # the request (happens via middleware/stats.py).
        request._metric_tags = {}

        if settings.SENTRY_API_RESPONSE_DELAY:
            start_time = time.time()

        origin = request.META.get("HTTP_ORIGIN", "null")
        # A "null" value should be treated as no Origin for us.
        # See RFC6454 for more information on this behavior.
        if origin == "null":
            origin = None

        try:
            with sentry_sdk.start_span(op="base.dispatch.request",
                                       description=type(self).__name__):
                if origin and request.auth:
                    allowed_origins = request.auth.get_allowed_origins()
                    if not is_valid_origin(origin, allowed=allowed_origins):
                        response = Response("Invalid origin: %s" % (origin, ),
                                            status=400)
                        self.response = self.finalize_response(
                            request, response, *args, **kwargs)
                        return self.response

                self.initial(request, *args, **kwargs)

                # Get the appropriate handler method
                if request.method.lower() in self.http_method_names:
                    handler = getattr(self, request.method.lower(),
                                      self.http_method_not_allowed)

                    (args, kwargs) = self.convert_args(request, *args,
                                                       **kwargs)
                    self.args = args
                    self.kwargs = kwargs
                else:
                    handler = self.http_method_not_allowed

                if getattr(request, "access", None) is None:
                    # setup default access
                    request.access = access.from_request(request)

            with sentry_sdk.start_span(
                    op="base.dispatch.execute",
                    description="{}.{}".format(
                        type(self).__name__, handler.__name__),
            ):
                response = handler(request, *args, **kwargs)

        except Exception as exc:
            response = self.handle_exception(request, exc)

        if origin:
            self.add_cors_headers(request, response)

        self.response = self.finalize_response(request, response, *args,
                                               **kwargs)

        if settings.SENTRY_API_RESPONSE_DELAY:
            duration = time.time() - start_time

            if duration < (settings.SENTRY_API_RESPONSE_DELAY / 1000.0):
                with sentry_sdk.start_span(
                        op="base.dispatch.sleep",
                        description=type(self).__name__,
                ) as span:
                    span.set_data("SENTRY_API_RESPONSE_DELAY",
                                  settings.SENTRY_API_RESPONSE_DELAY)
                    time.sleep(settings.SENTRY_API_RESPONSE_DELAY / 1000.0 -
                               duration)

        return self.response
Example #50
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    if release:
        result = fetch_release_file(url, release)
    else:
        result = None

    cache_key = 'source:cache:v3:%s' % (
        md5(url).hexdigest(),
    )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': url,
            }
            raise CannotFetchSource(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            body = zlib.decompress(result[1])
            result = (result[0], force_text(body), result[2])

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5(domain).hexdigest(),
        )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        http_session = http.build_session()
        try:
            response = http_session.get(
                url,
                allow_redirects=True,
                verify=False,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, RestrictedIPAddress):
                error = {
                    'type': EventError.RESTRICTED_IP,
                    'url': url,
                }
            elif isinstance(exc, SuspiciousOperation):
                error = {
                    'type': EventError.SECURITY_VIOLATION,
                    'url': url,
                }
            elif isinstance(exc, (RequestException, ZeroReturnError)):
                error = {
                    'type': EventError.JS_GENERIC_FETCH_ERROR,
                    'value': str(type(exc)),
                    'url': url,
                }
            else:
                logger.exception(unicode(exc))
                error = {
                    'type': EventError.UNKNOWN_ERROR,
                    'url': url,
                }

            # TODO(dcramer): we want to be less aggressive on disabling domains
            cache.set(domain_key, error or '', 300)
            logger.warning('Disabling sources to %s for %ss', domain, 300,
                           exc_info=True)
            raise CannotFetchSource(error)

        # requests' attempts to use chardet internally when no encoding is found
        # and we want to avoid that slow behavior
        if not response.encoding:
            response.encoding = 'utf-8'

        body = response.text
        z_body = zlib.compress(force_bytes(body))
        headers = {k.lower(): v for k, v in response.headers.items()}

        cache.set(cache_key, (headers, z_body, response.status_code), 60)
        result = (headers, body, response.status_code)

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url,
                     exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': url,
        }
        raise CannotFetchSource(error)

    # Make sure the file we're getting back is unicode, if it's not,
    # it's either some encoding that we don't understand, or it's binary
    # data which we can't process.
    if not isinstance(result[1], unicode):
        try:
            result = (result[0], result[1].decode('utf8'), result[2])
        except UnicodeDecodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': url,
            }
            raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1])
Example #51
0
    def _dispatch(self, request, helper, project_id=None, origin=None, *args, **kwargs):
        request.user = AnonymousUser()

        project = self._get_project_from_id(project_id)
        if project:
            helper.context.bind_project(project)

        if origin is not None:
            # This check is specific for clients who need CORS support
            if not project:
                raise APIError('Client must be upgraded for CORS support')
            if not is_valid_origin(origin, project):
                track_outcome(
                    project.organization_id,
                    project.id,
                    None,
                    Outcome.INVALID,
                    FilterStatKeys.CORS)
                raise APIForbidden('Invalid origin: %s' % (origin, ))

        # XXX: It seems that the OPTIONS call does not always include custom headers
        if request.method == 'OPTIONS':
            response = self.options(request, project)
        else:
            auth = self._parse_header(request, helper, project)

            key = helper.project_key_from_auth(auth)

            # Legacy API was /api/store/ and the project ID was only available elsewhere
            if not project:
                project = Project.objects.get_from_cache(id=key.project_id)
                helper.context.bind_project(project)
            elif key.project_id != project.id:
                raise APIError('Two different projects were specified')

            helper.context.bind_auth(auth)

            # Explicitly bind Organization so we don't implicitly query it later
            # this just allows us to comfortably assure that `project.organization` is safe.
            # This also allows us to pull the object from cache, instead of being
            # implicitly fetched from database.
            project.organization = Organization.objects.get_from_cache(
                id=project.organization_id)

            response = super(APIView, self).dispatch(
                request=request, project=project, auth=auth, helper=helper, key=key, **kwargs
            )

        if origin:
            if origin == 'null':
                # If an Origin is `null`, but we got this far, that means
                # we've gotten past our CORS check for some reason. But the
                # problem is that we can't return "null" as a valid response
                # to `Access-Control-Allow-Origin` and we don't have another
                # value to work with, so just allow '*' since they've gotten
                # this far.
                response['Access-Control-Allow-Origin'] = '*'
            else:
                response['Access-Control-Allow-Origin'] = origin

        return response
Example #52
0
 def isValidOrigin(self, origin, inputs):
     with mock.patch('sentry.utils.http.get_origins') as get_origins:
         get_origins.return_value = inputs
         result = is_valid_origin(origin, self.project)
         get_origins.assert_called_once_with(self.project)
     return result
Example #53
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise CannotFetchSource({
            'type': EventError.JS_MISSING_SOURCE,
            'url': expose_url(url),
        })
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release)
    else:
        result = None

    cache_key = 'source:cache:v3:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[3]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = (result[0], zlib.decompress(result[1]), result[2],
                      encoding)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5_text(domain).hexdigest(), )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        with metrics.timer('sourcemaps.fetch'):
            http_session = http.build_session()
            response = None
            try:
                try:
                    start = time.time()
                    response = http_session.get(
                        url,
                        allow_redirects=True,
                        verify=False,
                        headers=headers,
                        timeout=settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT,
                        stream=True,
                    )

                    try:
                        cl = int(response.headers['content-length'])
                    except (LookupError, ValueError):
                        cl = 0
                    if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                        raise OverflowError()

                    contents = []
                    cl = 0

                    # Only need to even attempt to read the response body if we
                    # got a 200 OK
                    if response.status_code == 200:
                        for chunk in response.iter_content(16 * 1024):
                            if time.time(
                            ) - start > settings.SENTRY_SOURCE_FETCH_TIMEOUT:
                                raise Timeout()
                            contents.append(chunk)
                            cl += len(chunk)
                            if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                                raise OverflowError()

                except Exception as exc:
                    logger.debug('Unable to fetch %r', url, exc_info=True)
                    if isinstance(exc, RestrictedIPAddress):
                        error = {
                            'type': EventError.RESTRICTED_IP,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, SuspiciousOperation):
                        error = {
                            'type': EventError.SECURITY_VIOLATION,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, Timeout):
                        error = {
                            'type': EventError.JS_FETCH_TIMEOUT,
                            'url': expose_url(url),
                            'timeout': settings.SENTRY_SOURCE_FETCH_TIMEOUT,
                        }
                    elif isinstance(exc, OverflowError):
                        error = {
                            'type':
                            EventError.JS_TOO_LARGE,
                            'url':
                            expose_url(url),
                            # We want size in megabytes to format nicely
                            'max_size':
                            float(settings.SENTRY_SOURCE_FETCH_MAX_SIZE) /
                            1024 / 1024,
                        }
                    elif isinstance(exc, (RequestException, ZeroReturnError)):
                        error = {
                            'type': EventError.JS_GENERIC_FETCH_ERROR,
                            'value': six.text_type(type(exc)),
                            'url': expose_url(url),
                        }
                    else:
                        logger.exception(six.text_type(exc))
                        error = {
                            'type': EventError.UNKNOWN_ERROR,
                            'url': expose_url(url),
                        }

                    # TODO(dcramer): we want to be less aggressive on disabling domains
                    cache.set(domain_key, error or '', 300)
                    logger.warning('Disabling sources to %s for %ss',
                                   domain,
                                   300,
                                   exc_info=True)
                    raise CannotFetchSource(error)

                body = b''.join(contents)
                z_body = zlib.compress(body)
                headers = {k.lower(): v for k, v in response.headers.items()}
                encoding = response.encoding

                cache.set(cache_key,
                          (headers, z_body, response.status_code, encoding),
                          60)
                result = (headers, body, response.status_code, encoding)
            finally:
                if response is not None:
                    response.close()

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url, exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': expose_url(url),
        }
        raise CannotFetchSource(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        body_start = result[1][:20].lstrip(
        )  # Discard leading whitespace (often found before doctype)

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise CannotFetchSource(error)

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result[1], six.binary_type):
        try:
            result = (result[0], result[1].encode('utf8'), None)
        except UnicodeEncodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1], result[3])