Example #1
0
def _get_project_enhancements_config(project):
    enhancements = project.get_option('sentry:grouping_enhancements')
    enhancements_base = project.get_option('sentry:grouping_enhancements_base')
    if not enhancements and not enhancements_base:
        return DEFAULT_ENHANCEMENTS_CONFIG

    if enhancements_base is None or enhancements_base not in ENHANCEMENT_BASES:
        enhancements_base = DEFAULT_ENHANCEMENT_BASE

    # Instead of parsing and dumping out config here, we can make a
    # shortcut
    from sentry.utils.cache import cache
    from sentry.utils.hashlib import md5_text
    cache_key = 'grouping-enhancements:' + \
        md5_text('%s|%s' % (enhancements_base, enhancements)).hexdigest()
    rv = cache.get(cache_key)
    if rv is not None:
        return rv

    try:
        rv = Enhancements.from_config_string(
            enhancements or '', bases=[enhancements_base]).dumps()
    except InvalidEnhancerConfig:
        rv = DEFAULT_ENHANCEMENTS_CONFIG
    cache.set(cache_key, rv)
    return rv
Example #2
0
    def _finish_login_pipeline(self, identity):
        """
        The login flow executes both with anonymous and authenticated users.

        Upon completion a few branches exist:

        If the identity is already linked, the user should be logged in
        and redirected immediately.

        Otherwise, the user is presented with a confirmation window. That window
        will show them the new account that will be created, and if they're
        already authenticated an optional button to associate the identity with
        their account.
        """
        auth_provider = self.auth_provider
        user_id = identity['id']

        lock = locks.get(
            'sso:auth:{}:{}'.format(
                auth_provider.id,
                md5_text(user_id).hexdigest(),
            ),
            duration=5,
        )
        with TimedRetryPolicy(5)(lock.acquire):
            try:
                auth_identity = AuthIdentity.objects.select_related('user').get(
                    auth_provider=auth_provider,
                    ident=user_id,
                )
            except AuthIdentity.DoesNotExist:
                auth_identity = None

            # Handle migration of identity keys
            if not auth_identity and isinstance(user_id, MigratingIdentityId):
                try:
                    auth_identity = AuthIdentity.objects.select_related('user').get(
                        auth_provider=auth_provider,
                        ident=user_id.legacy_id,
                    )
                    auth_identity.update(ident=user_id.id)
                except AuthIdentity.DoesNotExist:
                    auth_identity = None

            if not auth_identity:
                return self._handle_unknown_identity(identity)

            # If the User attached to this AuthIdentity is not active,
            # we want to clobber the old account and take it over, rather than
            # getting logged into the inactive account.
            if not auth_identity.user.is_active:

                # Current user is also not logged in, so we have to
                # assume unknown.
                if not self.request.user.is_authenticated():
                    return self._handle_unknown_identity(identity)

                auth_identity = self._handle_attach_identity(identity)

            return self._handle_existing_identity(auth_identity, identity)
Example #3
0
    def __init__(self, request, organization, flow, auth_provider=None,
                 provider_key=None):
        assert provider_key or auth_provider

        self.request = request
        self.auth_provider = auth_provider
        self.organization = organization
        self.flow = flow

        if auth_provider:
            provider = auth_provider.get_provider()
        elif provider_key:
            provider = manager.get(provider_key)
        else:
            raise NotImplementedError

        self.provider = provider
        if flow == self.FLOW_LOGIN:
            self.pipeline = provider.get_auth_pipeline()
        elif flow == self.FLOW_SETUP_PROVIDER:
            self.pipeline = provider.get_setup_pipeline()
        else:
            raise NotImplementedError

        # we serialize the pipeline to be [AuthView().get_ident(), ...] which
        # allows us to determine if the pipeline has changed during the auth
        # flow or if the user is somehow circumventing a chunk of it
        self.signature = md5_text(
            ' '.join(av.get_ident() for av in self.pipeline)
        ).hexdigest()
Example #4
0
    def _get_event_user(self, project, data):
        user_data = data.get('sentry.interfaces.User')
        if not user_data:
            return

        euser = EventUser(
            project=project,
            ident=user_data.get('id'),
            email=user_data.get('email'),
            username=user_data.get('username'),
            ip_address=user_data.get('ip_address'),
        )

        if not euser.tag_value:
            return

        cache_key = 'euser:{}:{}'.format(
            project.id,
            md5_text(euser.tag_value).hexdigest(),
        )
        cached = default_cache.get(cache_key)
        if cached is None:
            try:
                with transaction.atomic(using=router.db_for_write(EventUser)):
                    euser.save()
            except IntegrityError:
                pass
            default_cache.set(cache_key, '', 3600)

        return euser
Example #5
0
 def test_query_hash(self):
     recent_search = RecentSearch.objects.create(
         organization=self.organization,
         user=self.user,
         type=0,
         query='hello',
     )
     recent_search = RecentSearch.objects.get(id=recent_search.id)
     assert recent_search.query_hash == md5_text(recent_search.query).hexdigest()
Example #6
0
def make_key(model, prefix, kwargs):
    kwargs_bits = []
    for k, v in sorted(six.iteritems(kwargs)):
        k = __prep_key(model, k)
        v = smart_text(__prep_value(model, k, v))
        kwargs_bits.append("%s=%s" % (k, v))
    kwargs_bits = ":".join(kwargs_bits)

    return "%s:%s:%s" % (prefix, model.__name__, md5_text(kwargs_bits).hexdigest())
Example #7
0
 def _make_key(self, model, filters):
     """
     Returns a Redis-compatible key for the model given filters.
     """
     return 'b:k:%s:%s' % (
         model._meta, md5_text(
             '&'.
             join('%s=%s' % (k, self._coerce_val(v)) for k, v in sorted(six.iteritems(filters)))
         ).hexdigest(),
     )
Example #8
0
    def for_tags(cls, project_id, values):
        """
        Finds matching EventUser objects from a list of tag values.

        Return a dictionary of {tag_value: event_user}.
        """
        hashes = [md5_text(v.split(':', 1)[-1]).hexdigest() for v in values]
        return {e.tag_value: e for e in cls.objects.filter(
            project_id=project_id,
            hash__in=hashes,
        )}
Example #9
0
    def get_conf_version(self, project):
        """
        Returns a version string that represents the current configuration state.

        If any option changes or new options added, the version will change.

        >>> plugin.get_conf_version(project)
        """
        options = self.get_conf_options(project)
        return md5_text('&'.join(sorted('%s=%s' % o
                                        for o in six.iteritems(options)))).hexdigest()[:3]
Example #10
0
def get_gravatar_url(email, size=None, default='mm'):
    if email is None:
        email = ''
    gravatar_url = "%s/avatar/%s" % (settings.SENTRY_GRAVATAR_BASE_URL,
                                     md5_text(email.lower()).hexdigest())

    properties = {}
    if size:
        properties['s'] = six.text_type(size)
    if default:
        properties['d'] = default
    if properties:
        gravatar_url += "?" + urlencode(properties)

    return gravatar_url
Example #11
0
 def get(self, request):
     results = status_checks.check_all()
     return Response({
         'problems': [
             {
                 'id': md5_text(problem.message).hexdigest(),
                 'message': problem.message,
                 'severity': problem.severity,
                 'url': problem.url,
             }
             for problem in sorted(itertools.chain.from_iterable(results.values()),
                                  reverse=True)
         ],
         'healthy': {type(check).__name__: not problems for check, problems in results.items()},
     })
Example #12
0
    def get_event_payload(self, event):
        props = {
            'event_id': event.event_id,
            'project_id': event.project.slug,
            'transaction': event.get_tag('transaction') or '',
            'release': event.get_tag('sentry:release') or '',
            'environment': event.get_tag('environment') or '',
            'type': event.get_event_type(),
        }
        props['tags'] = [[k.format(tagstore.get_standardized_key(k)), v]
                         for k, v in event.get_tags()]
        for key, value in six.iteritems(event.interfaces):
            if key == 'request':
                headers = value.headers
                if not isinstance(headers, dict):
                    headers = dict(headers or ())

                props.update({
                    'request_url': value.url,
                    'request_method': value.method,
                    'request_referer': headers.get('Referer', ''),
                })
            elif key == 'exception':
                exc = value.values[0]
                props.update({
                    'exception_type': exc.type,
                    'exception_value': exc.value,
                })
            elif key == 'logentry':
                props.update({
                    'message': value.formatted or value.message,
                })
            elif key in ('csp', 'expectct', 'expectstable', 'hpkp'):
                props.update({
                    '{}_{}'.format(key.rsplit('.', 1)[-1].lower(), k): v
                    for k, v in six.iteritems(value.to_json())
                })
            elif key == 'user':
                user_payload = {}
                if value.id:
                    user_payload['user_id'] = value.id
                if value.email:
                    user_payload['user_email_hash'] = md5_text(value.email).hexdigest()
                if value.ip_address:
                    user_payload['user_ip_trunc'] = anonymize_ip(value.ip_address)
                if user_payload:
                    props.update(user_payload)
        return props
Example #13
0
    def is_limited(self, key, limit, project=None, window=None):
        if window is None:
            window = self.window

        key_hex = md5_text(key).hexdigest()
        bucket = int(time() / window)

        if project:
            key = 'rl:%s:%s:%s' % (key_hex, project.id, bucket)
        else:
            key = 'rl:%s:%s' % (key_hex, bucket)

        with self.cluster.map() as client:
            result = client.incr(key)
            client.expire(key, window)

        return result.value > limit
Example #14
0
    def __init__(self, request, organization, provider_key, provider_model=None, config=None):
        if config is None:
            config = {}

        self.request = request
        self.organization = organization
        self.state = RedisSessionStore(request, self.pipeline_name)
        self.provider = self.provider_manager.get(provider_key)
        self.provider_model = provider_model

        self.config = config
        self.provider.set_config(config)

        self.pipeline = self.get_pipeline_views()

        # we serialize the pipeline to be ['fqn.PipelineView', ...] which
        # allows us to determine if the pipeline has changed during the auth
        # flow or if the user is somehow circumventing a chunk of it
        pipe_ids = ['{}.{}'.format(type(v).__module__, type(v).__name__) for v in self.pipeline]
        self.signature = md5_text(*pipe_ids).hexdigest()
Example #15
0
def get_fingerprinting_config_for_project(project):
    from sentry.grouping.fingerprinting import FingerprintingRules, \
        InvalidFingerprintingConfig
    rules = project.get_option('sentry:fingerprinting_rules')
    if not rules:
        return FingerprintingRules([])

    from sentry.utils.cache import cache
    from sentry.utils.hashlib import md5_text
    cache_key = 'fingerprinting-rules:' + md5_text(rules).hexdigest()
    rv = cache.get(cache_key)
    if rv is not None:
        return FingerprintingRules.from_json(rv)

    try:
        rv = FingerprintingRules.from_config_string(
            rules or '')
    except InvalidFingerprintingConfig:
        rv = FingerprintingRules([])
    cache.set(cache_key, rv.to_json())
    return rv
Example #16
0
def _make_cache_key(key):
    return 'o:%s' % md5_text(key).hexdigest()
Example #17
0
def fetch_release_file(filename, release, dist=None):
    cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), )

    logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id)
    result = cache.get(cache_key)

    dist_name = dist and dist.name or None

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            'Checking database for release artifact %r (release_id=%s)', filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                dist=dist,
                ident__in=filename_idents,
            ).select_related('file')
        )

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)', filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next((
                rf
                for ident in filename_idents
                for rf in possible_files
                if rf.ident == ident
            ))

        logger.debug(
            'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id
        )
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception:
            logger.error('sourcemap.compress_read_failed', exc_info=sys.exc_info())
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
Example #18
0
 def get_rl_key(self, event):
     return "{}:{}".format(self.conf_key, md5_text(self.project_token).hexdigest())
Example #19
0
 def get_cache_key(cls, project_id, name):
     return 'env:1:%s:%s' % (project_id, md5_text(name).hexdigest())
Example #20
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug('Checking database for release artifact %r (release_id=%s)',
                     filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(ReleaseFile.objects.filter(
            release=release,
            ident__in=filename_idents,
        ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug('Release artifact %r not found in database (release_id=%s)',
                         filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next((f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with releasefile.file.getfile() as fp:
                z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            try:
                result = (releasefile.file.headers, body.decode('utf-8'), 200)
            except UnicodeDecodeError:
                error = {
                    'type': EventError.JS_INVALID_SOURCE_ENCODING,
                    'value': 'utf8',
                    'url': expose_url(releasefile.name),
                }
                raise CannotFetchSource(error)
            else:
                # Write the compressed version to cache, but return the deflated version
                cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # We got a cache hit, but the body is compressed, so we
        # need to decompress it before handing it off
        body = zlib.decompress(result[1])
        try:
            result = (result[0], body.decode('utf-8'), result[2])
        except UnicodeDecodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(releasefile.name),
            }
            raise CannotFetchSource(error)

    return result
Example #21
0
    def handle_basic_auth(self, request):
        can_register = auth.has_user_registration() or request.session.get('can_register')

        op = request.POST.get('op')

        # Detect that we are on the register page by url /register/ and
        # then activate the register tab by default.
        if not op and '/register' in request.path_info and can_register:
            op = 'register'

        login_form = self.get_login_form(request)
        if can_register:
            register_form = self.get_register_form(request, initial={
                'username': request.session.get('invite_email', '')
            })
        else:
            register_form = None

        if can_register and register_form.is_valid():
            user = register_form.save()
            user.send_confirm_emails(is_new_user=True)

            # HACK: grab whatever the first backend is and assume it works
            user.backend = settings.AUTHENTICATION_BACKENDS[0]

            auth.login(request, user)

            # can_register should only allow a single registration
            request.session.pop('can_register', None)
            request.session.pop('invite_email', None)

            return self.redirect(auth.get_login_redirect(request))

        elif request.method == 'POST':
            from sentry.app import ratelimiter
            from sentry.utils.hashlib import md5_text

            login_attempt = op == 'login' and request.POST.get('username') and request.POST.get('password')

            if login_attempt and ratelimiter.is_limited(
                u'auth:login:username:{}'.format(md5_text(request.POST['username'].lower()).hexdigest()),
                limit=10, window=60,  # 10 per minute should be enough for anyone
            ):
                login_form.errors['__all__'] = [u'You have made too many login attempts. Please try again later.']
            elif login_form.is_valid():
                user = login_form.get_user()

                auth.login(request, user)

                if not user.is_active:
                    return self.redirect(reverse('sentry-reactivate-account'))

                return self.redirect(auth.get_login_redirect(request))

        context = {
            'op': op or 'login',
            'server_hostname': get_server_hostname(),
            'login_form': login_form,
            'register_form': register_form,
            'CAN_REGISTER': can_register,
        }
        return self.respond('sentry/login.html', context)
Example #22
0
 def get_cache_key(cls, project_id, _key_id, value):
     return 'tagvalue:1:%s:%s:%s' % (project_id, _key_id,
                                     md5_text(value).hexdigest())
Example #23
0
    def post_process(self, event, **kwargs):
        token = self.get_option("token", event.project)
        index = self.get_option("index", event.project)
        instance = self.get_option("instance", event.project)
        if not (token and index and instance):
            metrics.incr(
                "integrations.splunk.forward-event.unconfigured",
                tags={
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                    "event_type": event.get_event_type(),
                },
            )
            return

        if not instance.endswith("/services/collector"):
            instance = instance.rstrip("/") + "/services/collector"

        source = self.get_option("source", event.project) or "sentry"

        rl_key = "splunk:{}".format(md5_text(token).hexdigest())
        # limit splunk to 50 requests/second
        if ratelimiter.is_limited(rl_key, limit=1000, window=1):
            metrics.incr(
                "integrations.splunk.forward-event.rate-limited",
                tags={
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                    "event_type": event.get_event_type(),
                },
            )
            return

        payload = {
            "time": int(event.datetime.strftime("%s")),
            "source": source,
            "index": index,
            "event": self.get_event_payload(event),
        }
        host = self.get_host_for_splunk(event)
        if host:
            payload["host"] = host

        session = http.build_session()
        try:
            # https://docs.splunk.com/Documentation/Splunk/7.2.3/Data/TroubleshootHTTPEventCollector
            resp = session.post(
                instance,
                json=payload,
                # Splunk cloud instances certifcates dont play nicely
                verify=False,
                headers={"Authorization": "Splunk {}".format(token)},
                timeout=5,
            )
            if resp.status_code != 200:
                raise SplunkError.from_response(resp)
        except Exception as exc:
            metric = "integrations.splunk.forward-event.error"
            metrics.incr(
                metric,
                tags={
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                    "event_type": event.get_event_type(),
                    "error_code": getattr(exc, "code", None),
                },
            )
            logger.info(
                metric,
                extra={
                    "instance": instance,
                    "project_id": event.project_id,
                    "organization_id": event.project.organization_id,
                },
            )

            if isinstance(exc, ReadTimeout):
                # If we get a ReadTimeout we don't need to raise an error here.
                # Just log and return.
                return
            raise

        metrics.incr(
            "integrations.splunk.forward-event.success",
            tags={
                "project_id": event.project_id,
                "organization_id": event.project.organization_id,
                "event_type": event.get_event_type(),
            },
        )
Example #24
0
def fetch_file(url,
               project=None,
               release=None,
               dist=None,
               allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise http.CannotFetch({
            'type': EventError.JS_MISSING_SOURCE,
            'url': http.expose_url(url),
        })
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(result[0], result[1],
                                    zlib.decompress(result[2]), result[3],
                                    encoding)

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option('sentry:verify_ssl', False))
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option(
                    'sentry:token_header') or 'X-Sentry-Token'
                headers[token_header] = token

        with metrics.timer('sourcemaps.fetch'):
            result = http.fetch_file(url,
                                     headers=headers,
                                     verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url, result.headers, z_body, result.status, result.encoding),
                get_max_age(result.headers))

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch({
            'type': EventError.FETCH_INVALID_HTTP_CODE,
            'value': result.status,
            'url': http.expose_url(url),
        })

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(result.url, result.headers,
                                    result.body.encode('utf8'), result.status,
                                    result.encoding)
        except UnicodeEncodeError:
            error = {
                'type': EventError.FETCH_INVALID_ENCODING,
                'value': 'utf8',
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise http.CannotFetch(error)

    return result
Example #25
0
def fetch_file(url,
               domain_lock_enabled=True,
               outfile=None,
               headers=None,
               allow_redirects=True,
               verify_ssl=False,
               timeout=settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT,
               **kwargs):
    """
    Pull down a URL, returning a UrlResult object.
    """
    # lock down domains that are problematic
    if domain_lock_enabled:
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5_text(domain).hexdigest(), )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetch(domain_result)

    logger.debug('Fetching %r from the internet', url)

    http_session = build_session()
    response = None

    try:
        try:
            start = time.time()
            response = http_session.get(url,
                                        allow_redirects=allow_redirects,
                                        verify=verify_ssl,
                                        headers=headers,
                                        timeout=timeout,
                                        stream=True,
                                        **kwargs)

            try:
                cl = int(response.headers['content-length'])
            except (LookupError, ValueError):
                cl = 0
            if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                raise OverflowError()

            return_body = False
            if outfile is None:
                outfile = six.BytesIO()
                return_body = True

            cl = 0

            # Only need to even attempt to read the response body if we
            # got a 200 OK
            if response.status_code == 200:
                for chunk in response.iter_content(16 * 1024):
                    if time.time(
                    ) - start > settings.SENTRY_SOURCE_FETCH_TIMEOUT:
                        raise Timeout()
                    outfile.write(chunk)
                    cl += len(chunk)
                    if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                        raise OverflowError()

        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, RestrictedIPAddress):
                error = {
                    'type': EventError.RESTRICTED_IP,
                    'url': expose_url(url),
                }
            elif isinstance(exc, SuspiciousOperation):
                error = {
                    'type': EventError.SECURITY_VIOLATION,
                    'url': expose_url(url),
                }
            elif isinstance(exc, (Timeout, ReadTimeout)):
                error = {
                    'type': EventError.FETCH_TIMEOUT,
                    'url': expose_url(url),
                    'timeout': settings.SENTRY_SOURCE_FETCH_TIMEOUT,
                }
            elif isinstance(exc, OverflowError):
                error = {
                    'type':
                    EventError.FETCH_TOO_LARGE,
                    'url':
                    expose_url(url),
                    # We want size in megabytes to format nicely
                    'max_size':
                    float(settings.SENTRY_SOURCE_FETCH_MAX_SIZE) / 1024 / 1024,
                }
            elif isinstance(exc, (RequestException, ZeroReturnError)):
                error = {
                    'type': EventError.FETCH_GENERIC_ERROR,
                    'value': six.text_type(type(exc)),
                    'url': expose_url(url),
                }
            else:
                logger.exception(six.text_type(exc))
                error = {
                    'type': EventError.UNKNOWN_ERROR,
                    'url': expose_url(url),
                }

            # TODO(dcramer): we want to be less aggressive on disabling domains
            if domain_lock_enabled:
                cache.set(domain_key, error or '', 300)
                logger.warning('source.disabled', extra=error)
            raise CannotFetch(error)

        headers = {k.lower(): v for k, v in response.headers.items()}
        encoding = response.encoding

        body = None
        if return_body:
            body = outfile.getvalue()
            outfile.close()  # we only want to close StringIO

        result = (headers, body, response.status_code, encoding)
    finally:
        if response is not None:
            response.close()

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url, exc_info=True)
        error = {
            'type': EventError.FETCH_INVALID_HTTP_CODE,
            'value': result[2],
            'url': expose_url(url),
        }
        raise CannotFetch(error)

    return UrlResult(url, result[0], result[1], result[2], result[3])
Example #26
0
def fetch_release_file(filename, release, dist=None):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    dist_name = dist and dist.name or None

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [
            ReleaseFile.get_ident(f, dist_name) for f in filename_choices
        ]

        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                dist=dist,
                ident__in=filename_idents,
            ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next((rf for ident in filename_idents
                                for rf in possible_files if rf.ident == ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception:
            logger.error('sourcemap.compress_read_failed',
                         exc_info=sys.exc_info())
            result = None
        else:
            headers = {
                k.lower(): v
                for k, v in releasefile.file.headers.items()
            }
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(filename, result[0],
                                zlib.decompress(result[1]), result[2],
                                encoding)

    return result
Example #27
0
def _user_to_author_cache_key(organization_id, author):
    author_hash = md5_text(author.email.lower()).hexdigest()
    return f"get_users_for_authors:{organization_id}:{author_hash}"
Example #28
0
 def get_hash(self):
     value = self.ident or self.username or self.email or self.ip_address
     return md5_text(value).hexdigest()
Example #29
0
 def get_cache_key(cls, group_id, release_id, environment):
     return "grouprelease:1:{}:{}".format(
         group_id,
         md5_text(f"{release_id}:{environment}").hexdigest())
Example #30
0
 def _generate_cache_version(self):
     return md5_text('&'.join(
         sorted(f.attname
                for f in self.model._meta.fields))).hexdigest()[:3]
Example #31
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release)
    else:
        result = None

    cache_key = 'source:cache:v3:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[3]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = (result[0], zlib.decompress(result[1]), result[2],
                      encoding)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5_text(domain).hexdigest(), )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        with metrics.timer('sourcemaps.fetch'):
            http_session = http.build_session()
            response = None
            try:
                try:
                    start = time.time()
                    response = http_session.get(
                        url,
                        allow_redirects=True,
                        verify=False,
                        headers=headers,
                        timeout=settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT,
                        stream=True,
                    )

                    try:
                        cl = int(response.headers['content-length'])
                    except (LookupError, ValueError):
                        cl = 0
                    if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                        raise OverflowError()

                    contents = []
                    cl = 0

                    # Only need to even attempt to read the response body if we
                    # got a 200 OK
                    if response.status_code == 200:
                        for chunk in response.iter_content(16 * 1024):
                            if time.time(
                            ) - start > settings.SENTRY_SOURCE_FETCH_TIMEOUT:
                                raise Timeout()
                            contents.append(chunk)
                            cl += len(chunk)
                            if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                                raise OverflowError()

                except Exception as exc:
                    logger.debug('Unable to fetch %r', url, exc_info=True)
                    if isinstance(exc, RestrictedIPAddress):
                        error = {
                            'type': EventError.RESTRICTED_IP,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, SuspiciousOperation):
                        error = {
                            'type': EventError.SECURITY_VIOLATION,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, Timeout):
                        error = {
                            'type': EventError.JS_FETCH_TIMEOUT,
                            'url': expose_url(url),
                            'timeout': settings.SENTRY_SOURCE_FETCH_TIMEOUT,
                        }
                    elif isinstance(exc, OverflowError):
                        error = {
                            'type':
                            EventError.JS_TOO_LARGE,
                            'url':
                            expose_url(url),
                            # We want size in megabytes to format nicely
                            'max_size':
                            float(settings.SENTRY_SOURCE_FETCH_MAX_SIZE) /
                            1024 / 1024,
                        }
                    elif isinstance(exc, (RequestException, ZeroReturnError)):
                        error = {
                            'type': EventError.JS_GENERIC_FETCH_ERROR,
                            'value': six.text_type(type(exc)),
                            'url': expose_url(url),
                        }
                    else:
                        logger.exception(six.text_type(exc))
                        error = {
                            'type': EventError.UNKNOWN_ERROR,
                            'url': expose_url(url),
                        }

                    # TODO(dcramer): we want to be less aggressive on disabling domains
                    cache.set(domain_key, error or '', 300)
                    logger.warning('Disabling sources to %s for %ss',
                                   domain,
                                   300,
                                   exc_info=True)
                    raise CannotFetchSource(error)

                body = b''.join(contents)
                z_body = zlib.compress(body)
                headers = {k.lower(): v for k, v in response.headers.items()}
                encoding = response.encoding

                cache.set(cache_key,
                          (headers, z_body, response.status_code, encoding),
                          60)
                result = (headers, body, response.status_code, encoding)
            finally:
                if response is not None:
                    response.close()

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url, exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': expose_url(url),
        }
        raise CannotFetchSource(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        body_start = result[1][:20].lstrip(
        )  # Discard leading whitespace (often found before doctype)

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise CannotFetchSource(error)

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result[1], six.binary_type):
        try:
            result = (result[0], result[1].encode('utf8'), None)
        except UnicodeEncodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1], result[3])
Example #32
0
 def cache_version(self) -> str:
     if self._cache_version is None:
         self._cache_version = md5_text("&".join(
             sorted(f.attname
                    for f in self.model._meta.fields))).hexdigest()[:3]
     return self._cache_version
Example #33
0
def fetch_file(url, domain_lock_enabled=True, outfile=None,
               headers=None, allow_redirects=True, verify_ssl=False,
               timeout=settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT, **kwargs):
    """
    Pull down a URL, returning a UrlResult object.
    """
    # lock down domains that are problematic
    if domain_lock_enabled:
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5_text(domain).hexdigest(),
        )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetch(domain_result)

    logger.debug('Fetching %r from the internet', url)

    http_session = build_session()
    response = None

    try:
        try:
            start = time.time()
            response = http_session.get(
                url,
                allow_redirects=allow_redirects,
                verify=verify_ssl,
                headers=headers,
                timeout=timeout,
                stream=True,
                **kwargs
            )

            try:
                cl = int(response.headers['content-length'])
            except (LookupError, ValueError):
                cl = 0
            if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                raise OverflowError()

            return_body = False
            if outfile is None:
                outfile = six.BytesIO()
                return_body = True

            cl = 0

            # Only need to even attempt to read the response body if we
            # got a 200 OK
            if response.status_code == 200:
                for chunk in response.iter_content(16 * 1024):
                    if time.time() - start > settings.SENTRY_SOURCE_FETCH_TIMEOUT:
                        raise Timeout()
                    outfile.write(chunk)
                    cl += len(chunk)
                    if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                        raise OverflowError()

        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, RestrictedIPAddress):
                error = {
                    'type': EventError.RESTRICTED_IP,
                    'url': expose_url(url),
                }
            elif isinstance(exc, SuspiciousOperation):
                error = {
                    'type': EventError.SECURITY_VIOLATION,
                    'url': expose_url(url),
                }
            elif isinstance(exc, (Timeout, ReadTimeout)):
                error = {
                    'type': EventError.FETCH_TIMEOUT,
                    'url': expose_url(url),
                    'timeout': settings.SENTRY_SOURCE_FETCH_TIMEOUT,
                }
            elif isinstance(exc, OverflowError):
                error = {
                    'type': EventError.FETCH_TOO_LARGE,
                    'url': expose_url(url),
                    # We want size in megabytes to format nicely
                    'max_size': float(settings.SENTRY_SOURCE_FETCH_MAX_SIZE) / 1024 / 1024,
                }
            elif isinstance(exc, (RequestException, ZeroReturnError)):
                error = {
                    'type': EventError.FETCH_GENERIC_ERROR,
                    'value': six.text_type(type(exc)),
                    'url': expose_url(url),
                }
            else:
                logger.exception(six.text_type(exc))
                error = {
                    'type': EventError.UNKNOWN_ERROR,
                    'url': expose_url(url),
                }

            # TODO(dcramer): we want to be less aggressive on disabling domains
            if domain_lock_enabled:
                cache.set(domain_key, error or '', 300)
                logger.warning('source.disabled', extra=error)
            raise CannotFetch(error)

        headers = {k.lower(): v for k, v in response.headers.items()}
        encoding = response.encoding

        body = None
        if return_body:
            body = outfile.getvalue()
            outfile.close()  # we only want to close StringIO

        result = (headers, body, response.status_code, encoding)
    finally:
        if response is not None:
            response.close()

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url,
                     exc_info=True)
        error = {
            'type': EventError.FETCH_INVALID_HTTP_CODE,
            'value': result[2],
            'url': expose_url(url),
        }
        raise CannotFetch(error)

    return UrlResult(url, result[0], result[1], result[2], result[3])
Example #34
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                ident__in=filename_idents,
            ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next(
                (f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {
                k.lower(): v
                for k, v in releasefile.file.headers.items()
            }
            encoding = get_encoding_from_headers(headers)
            result = (headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = (result[0], zlib.decompress(result[1]), result[2], encoding)

    return result
Example #35
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    if release:
        result = fetch_release_file(url, release)
    else:
        result = None

    cache_key = 'source:cache:v3:%s' % (
        md5_text(url).hexdigest(),
    )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            body = zlib.decompress(result[1])
            result = (result[0], force_text(body), result[2])

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5_text(domain).hexdigest(),
        )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                headers['X-Sentry-Token'] = token

        logger.debug('Fetching %r from the internet', url)

        http_session = http.build_session()
        try:
            response = http_session.get(
                url,
                allow_redirects=True,
                verify=False,
                headers=headers,
                timeout=settings.SENTRY_SOURCE_FETCH_TIMEOUT,
            )
        except Exception as exc:
            logger.debug('Unable to fetch %r', url, exc_info=True)
            if isinstance(exc, RestrictedIPAddress):
                error = {
                    'type': EventError.RESTRICTED_IP,
                    'url': expose_url(url),
                }
            elif isinstance(exc, SuspiciousOperation):
                error = {
                    'type': EventError.SECURITY_VIOLATION,
                    'url': expose_url(url),
                }
            elif isinstance(exc, (RequestException, ZeroReturnError)):
                error = {
                    'type': EventError.JS_GENERIC_FETCH_ERROR,
                    'value': six.text_type(type(exc)),
                    'url': expose_url(url),
                }
            else:
                logger.exception(six.text_type(exc))
                error = {
                    'type': EventError.UNKNOWN_ERROR,
                    'url': expose_url(url),
                }

            # TODO(dcramer): we want to be less aggressive on disabling domains
            cache.set(domain_key, error or '', 300)
            logger.warning('Disabling sources to %s for %ss', domain, 300,
                           exc_info=True)
            raise CannotFetchSource(error)

        # requests' attempts to use chardet internally when no encoding is found
        # and we want to avoid that slow behavior
        if not response.encoding:
            response.encoding = 'utf-8'

        body = response.text
        z_body = zlib.compress(force_bytes(body))
        headers = {k.lower(): v for k, v in response.headers.items()}

        cache.set(cache_key, (headers, z_body, response.status_code), 60)
        result = (headers, body, response.status_code)

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url,
                     exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': expose_url(url),
        }
        raise CannotFetchSource(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        body_start = result[1][:20].lstrip()  # Discard leading whitespace (often found before doctype)

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise CannotFetchSource(error)

    # Make sure the file we're getting back is six.text_type, if it's not,
    # it's either some encoding that we don't understand, or it's binary
    # data which we can't process.
    if not isinstance(result[1], six.text_type):
        try:
            result = (result[0], result[1].decode('utf8'), result[2])
        except UnicodeDecodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1])
Example #36
0
    def _finish_login_pipeline(self, identity):
        """
        The login flow executes both with anonymous and authenticated users.

        Upon completion a few branches exist:

        If the identity is already linked, the user should be logged in
        and redirected immediately.

        Otherwise, the user is presented with a confirmation window. That window
        will show them the new account that will be created, and if they're
        already authenticated an optional button to associate the identity with
        their account.
        """
        auth_provider = self.auth_provider
        user_id = identity['id']

        lock = locks.get(
            u'sso:auth:{}:{}'.format(
                auth_provider.id,
                md5_text(user_id).hexdigest(),
            ),
            duration=5,
        )
        with TimedRetryPolicy(5)(lock.acquire):
            try:
                auth_identity = AuthIdentity.objects.select_related(
                    'user').get(
                        auth_provider=auth_provider,
                        ident=user_id,
                    )
            except AuthIdentity.DoesNotExist:
                auth_identity = None

            # Handle migration of identity keys
            if not auth_identity and isinstance(user_id, MigratingIdentityId):
                try:
                    auth_identity = AuthIdentity.objects.select_related(
                        'user').get(
                            auth_provider=auth_provider,
                            ident=user_id.legacy_id,
                        )
                    auth_identity.update(ident=user_id.id)
                except AuthIdentity.DoesNotExist:
                    auth_identity = None

            if not auth_identity:
                return handle_unknown_identity(
                    self.request,
                    self.organization,
                    self.auth_provider,
                    self.provider,
                    self.state,
                    identity,
                )

            # If the User attached to this AuthIdentity is not active,
            # we want to clobber the old account and take it over, rather than
            # getting logged into the inactive account.
            if not auth_identity.user.is_active:
                # Current user is also not logged in, so we have to
                # assume unknown.
                if not self.request.user.is_authenticated():
                    return handle_unknown_identity(
                        self.request,
                        self.organization,
                        self.auth_provider,
                        self.provider,
                        self.state,
                        identity,
                    )
                auth_identity = handle_attach_identity(
                    self.auth_provider,
                    self.request,
                    self.organization,
                    self.provider,
                    identity,
                )

            return handle_existing_identity(
                self.auth_provider,
                self.provider,
                self.organization,
                self.request,
                self.state,
                auth_identity,
                identity,
            )
Example #37
0
    def post(self,
             request: Request,
             organization=None,
             *args,
             **kwargs) -> Response:
        """
        Process a login request via username/password. SSO login is handled
        elsewhere.
        """
        login_form = AuthenticationForm(request, request.data)

        # Rate limit logins
        is_limited = ratelimiter.is_limited(
            "auth:login:username:{}".format(
                md5_text(
                    login_form.clean_username(
                        request.data.get("username"))).hexdigest()),
            limit=10,
            window=60,  # 10 per minute should be enough for anyone
        )

        if is_limited:
            errors = {"__all__": [login_form.error_messages["rate_limited"]]}
            metrics.incr("login.attempt",
                         instance="rate_limited",
                         skip_internal=True,
                         sample_rate=1.0)

            return self.respond_with_error(errors)

        if not login_form.is_valid():
            metrics.incr("login.attempt",
                         instance="failure",
                         skip_internal=True,
                         sample_rate=1.0)
            return self.respond_with_error(login_form.errors)

        user = login_form.get_user()

        auth.login(request,
                   user,
                   organization_id=organization.id if organization else None)
        metrics.incr("login.attempt",
                     instance="success",
                     skip_internal=True,
                     sample_rate=1.0)

        if not user.is_active:
            return Response({
                "nextUri":
                "/auth/reactivate/",
                "user":
                serialize(user, user, DetailedUserSerializer()),
            })

        active_org = self.get_active_organization(request)
        redirect_url = auth.get_org_redirect_url(request, active_org)

        return Response({
            "nextUri":
            auth.get_login_redirect(request, redirect_url),
            "user":
            serialize(user, user, DetailedUserSerializer()),
        })
Example #38
0
 def hash_from_tag(cls, value):
     return md5_text(value.split(':', 1)[-1]).hexdigest()
Example #39
0
 def get_cache_key(cls, project_id, version):
     return 'release:2:%s:%s' % (project_id, md5_text(version).hexdigest())
Example #40
0
 def build_hash(self):
     for key, value in self.iter_attributes():
         if value:
             return md5_text(value).hexdigest()
Example #41
0
 def get_cache_key(cls, organization_id, name):
     return 'env:2:%s:%s' % (organization_id, md5_text(name).hexdigest())
Example #42
0
 def get_cache_key(cls, group_id, release_id, environment):
     return 'grouprelease:1:{}:{}'.format(
         group_id,
         md5_text(u'{}:{}'.format(release_id, environment)).hexdigest(),
     )
Example #43
0
 def build_hash(self):
     value = self.ident or self.username or self.email or self.ip_address
     if not value:
         return None
     return md5_text(value).hexdigest()
Example #44
0
 def get_cache_key(cls, organization_id, version):
     return 'release:3:%s:%s' % (organization_id, md5_text(version).hexdigest())
Example #45
0
def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise http.CannotFetch(
            {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
        )
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(
                result[0], result[1], zlib.decompress(result[2]), result[3], encoding
            )

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option('sentry:verify_ssl', False))
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option('sentry:token_header') or 'X-Sentry-Token'
                headers[token_header] = token

        with metrics.timer('sourcemaps.fetch'):
            result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url,
                 result.headers,
                 z_body,
                 result.status,
                 result.encoding),
                get_max_age(result.headers))

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch(
            {
                'type': EventError.FETCH_INVALID_HTTP_CODE,
                'value': result.status,
                'url': http.expose_url(url),
            }
        )

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(
                result.url, result.headers,
                result.body.encode('utf8'), result.status, result.encoding
            )
        except UnicodeEncodeError:
            error = {
                'type': EventError.FETCH_INVALID_ENCODING,
                'value': 'utf8',
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise http.CannotFetch(error)

    return result
Example #46
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug('Checking database for release artifact %r (release_id=%s)',
                     filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(ReleaseFile.objects.filter(
            release=release,
            ident__in=filename_idents,
        ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug('Release artifact %r not found in database (release_id=%s)',
                         filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next((f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = (headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = (result[0], zlib.decompress(result[1]), result[2], encoding)

    return result
Example #47
0
 def _generate_cache_version(self):
     return md5_text("&".join(sorted(f.attname for f in self.model._meta.fields))).hexdigest()[:3]
Example #48
0
def fetch_file(url, project=None, release=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise CannotFetchSource({
            'type': EventError.JS_MISSING_SOURCE,
            'url': expose_url(url),
        })
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release)
    else:
        result = None

    cache_key = 'source:cache:v3:%s' % (
        md5_text(url).hexdigest(),
    )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[3]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = (result[0], zlib.decompress(result[1]), result[2], encoding)

    if result is None:
        # lock down domains that are problematic
        domain = urlparse(url).netloc
        domain_key = 'source:blacklist:v2:%s' % (
            md5_text(domain).hexdigest(),
        )
        domain_result = cache.get(domain_key)
        if domain_result:
            domain_result['url'] = url
            raise CannotFetchSource(domain_result)

        headers = {}
        if project and is_valid_origin(url, project=project):
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option(
                    'sentry:token_header',
                    'X-Sentry-Token',
                )
                headers[token_header] = token

        logger.debug('Fetching %r from the internet', url)

        with metrics.timer('sourcemaps.fetch'):
            http_session = http.build_session()
            response = None
            try:
                try:
                    start = time.time()
                    response = http_session.get(
                        url,
                        allow_redirects=True,
                        verify=False,
                        headers=headers,
                        timeout=settings.SENTRY_SOURCE_FETCH_SOCKET_TIMEOUT,
                        stream=True,
                    )

                    try:
                        cl = int(response.headers['content-length'])
                    except (LookupError, ValueError):
                        cl = 0
                    if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                        raise OverflowError()

                    contents = []
                    cl = 0

                    # Only need to even attempt to read the response body if we
                    # got a 200 OK
                    if response.status_code == 200:
                        for chunk in response.iter_content(16 * 1024):
                            if time.time() - start > settings.SENTRY_SOURCE_FETCH_TIMEOUT:
                                raise Timeout()
                            contents.append(chunk)
                            cl += len(chunk)
                            if cl > settings.SENTRY_SOURCE_FETCH_MAX_SIZE:
                                raise OverflowError()

                except Exception as exc:
                    logger.debug('Unable to fetch %r', url, exc_info=True)
                    if isinstance(exc, RestrictedIPAddress):
                        error = {
                            'type': EventError.RESTRICTED_IP,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, SuspiciousOperation):
                        error = {
                            'type': EventError.SECURITY_VIOLATION,
                            'url': expose_url(url),
                        }
                    elif isinstance(exc, Timeout):
                        error = {
                            'type': EventError.JS_FETCH_TIMEOUT,
                            'url': expose_url(url),
                            'timeout': settings.SENTRY_SOURCE_FETCH_TIMEOUT,
                        }
                    elif isinstance(exc, OverflowError):
                        error = {
                            'type': EventError.JS_TOO_LARGE,
                            'url': expose_url(url),
                            # We want size in megabytes to format nicely
                            'max_size': float(settings.SENTRY_SOURCE_FETCH_MAX_SIZE) / 1024 / 1024,
                        }
                    elif isinstance(exc, (RequestException, ZeroReturnError)):
                        error = {
                            'type': EventError.JS_GENERIC_FETCH_ERROR,
                            'value': six.text_type(type(exc)),
                            'url': expose_url(url),
                        }
                    else:
                        logger.exception(six.text_type(exc))
                        error = {
                            'type': EventError.UNKNOWN_ERROR,
                            'url': expose_url(url),
                        }

                    # TODO(dcramer): we want to be less aggressive on disabling domains
                    cache.set(domain_key, error or '', 300)
                    logger.warning('source.disabled', extra=error)
                    raise CannotFetchSource(error)

                body = b''.join(contents)
                z_body = zlib.compress(body)
                headers = {k.lower(): v for k, v in response.headers.items()}
                encoding = response.encoding

                cache.set(cache_key, (headers, z_body, response.status_code, encoding), 60)
                result = (headers, body, response.status_code, encoding)
            finally:
                if response is not None:
                    response.close()

    if result[2] != 200:
        logger.debug('HTTP %s when fetching %r', result[2], url,
                     exc_info=True)
        error = {
            'type': EventError.JS_INVALID_HTTP_CODE,
            'value': result[2],
            'url': expose_url(url),
        }
        raise CannotFetchSource(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        body_start = result[1][:20].lstrip()  # Discard leading whitespace (often found before doctype)

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise CannotFetchSource(error)

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result[1], six.binary_type):
        try:
            result = (result[0], result[1].encode('utf8'), None)
        except UnicodeEncodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(url),
            }
            raise CannotFetchSource(error)

    return UrlResult(url, result[0], result[1], result[3])
Example #49
0
    def post_process(self, event, **kwargs):
        token = self.get_option('token', event.project)
        index = self.get_option('index', event.project)
        instance = self.get_option('instance', event.project)
        if not (token and index and instance):
            metrics.incr('integrations.splunk.forward-event.unconfigured', tags={
                'project_id': event.project_id,
                'organization_id': event.project.organization_id,
                'event_type': event.get_event_type(),
            })
            return

        if not instance.endswith('/services/collector'):
            instance = instance.rstrip('/') + '/services/collector'

        source = self.get_option('source', event.project) or 'sentry'

        rl_key = 'splunk:{}'.format(md5_text(token).hexdigest())
        # limit splunk to 50 requests/second
        if ratelimiter.is_limited(rl_key, limit=1000, window=1):
            metrics.incr('integrations.splunk.forward-event.rate-limited', tags={
                'project_id': event.project_id,
                'organization_id': event.project.organization_id,
                'event_type': event.get_event_type(),
            })
            return

        payload = {
            'time': int(event.datetime.strftime('%s')),
            'source': source,
            'index': index,
            'event': self.get_event_payload(event),
        }
        host = self.get_host_for_splunk(event)
        if host:
            payload['host'] = host

        session = http.build_session()
        try:
            # https://docs.splunk.com/Documentation/Splunk/7.2.3/Data/TroubleshootHTTPEventCollector
            resp = session.post(
                instance,
                json=payload,
                # Splunk cloud instances certifcates dont play nicely
                verify=False,
                headers={
                    'Authorization': 'Splunk {}'.format(token)
                },
                timeout=5,
            )
            if resp.status_code != 200:
                raise SplunkError.from_response(resp)
        except Exception as exc:
            metric = 'integrations.splunk.forward-event.error'
            metrics.incr(metric, tags={
                'project_id': event.project_id,
                'organization_id': event.project.organization_id,
                'event_type': event.get_event_type(),
                'error_code': getattr(exc, 'code', None),
            })
            logger.info(
                metric,
                extra={
                    'instance': instance,
                    'project_id': event.project_id,
                    'organization_id': event.project.organization_id,
                },
            )

            if isinstance(exc, ReadTimeout):
                # If we get a ReadTimeout we don't need to raise an error here.
                # Just log and return.
                return
            raise

        metrics.incr('integrations.splunk.forward-event.success', tags={
            'project_id': event.project_id,
            'organization_id': event.project.organization_id,
            'event_type': event.get_event_type(),
        })
Example #50
0
 def get_cache_key(cls, organization_id, version):
     return "release:3:%s:%s" % (organization_id,
                                 md5_text(version).hexdigest())