Beispiel #1
0
def fetch_sourcemap(url,
                    project=None,
                    release=None,
                    dist=None,
                    allow_scraping=True):
    if is_data_uri(url):
        try:
            body = base64.b64decode(url[BASE64_PREAMBLE_LENGTH:] + (
                b'=' * (-(len(url) - BASE64_PREAMBLE_LENGTH) % 4)))
        except TypeError as e:
            raise UnparseableSourcemap({
                'url': '<base64>',
                'reason': e.message,
            })
    else:
        result = fetch_file(url,
                            project=project,
                            release=release,
                            dist=dist,
                            allow_scraping=allow_scraping)
        body = result.body
    try:
        return view_from_json(body)
    except Exception as exc:
        # This is in debug because the product shows an error already.
        logger.debug(six.text_type(exc), exc_info=True)
        raise UnparseableSourcemap({
            'url': http.expose_url(url),
        })
Beispiel #2
0
def fetch_sourcemap(url, project=None, release=None, dist=None,
                    allow_scraping=True):
    if is_data_uri(url):
        try:
            body = base64.b64decode(
                url[BASE64_PREAMBLE_LENGTH:] + (b'=' * (-(len(url) - BASE64_PREAMBLE_LENGTH) % 4))
            )
        except TypeError as e:
            raise UnparseableSourcemap({
                'url': '<base64>',
                'reason': e.message,
            })
    else:
        result = fetch_file(url, project=project, release=release,
                            dist=dist,
                            allow_scraping=allow_scraping)
        body = result.body
    try:
        return view_from_json(body)
    except Exception as exc:
        # This is in debug because the product shows an error already.
        logger.debug(six.text_type(exc), exc_info=True)
        raise UnparseableSourcemap({
            'url': http.expose_url(url),
        })
Beispiel #3
0
def fetch_sourcemap(url,
                    project=None,
                    release=None,
                    dist=None,
                    allow_scraping=True):
    if is_data_uri(url):
        try:
            body = base64.b64decode(url[BASE64_PREAMBLE_LENGTH:] + (
                b"=" * (-(len(url) - BASE64_PREAMBLE_LENGTH) % 4)))
        except TypeError as e:
            raise UnparseableSourcemap({
                "url": "<base64>",
                "reason": six.text_type(e)
            })
    else:
        # look in the database and, if not found, optionally try to scrape the web
        result = fetch_file(url,
                            project=project,
                            release=release,
                            dist=dist,
                            allow_scraping=allow_scraping)
        body = result.body
    try:
        return SourceMapView.from_json_bytes(body)
    except Exception as exc:
        # This is in debug because the product shows an error already.
        logger.debug(six.text_type(exc), exc_info=True)
        raise UnparseableSourcemap({"url": http.expose_url(url)})
Beispiel #4
0
    def process_frame(self, processable_frame, processing_task):
        frame = processable_frame.frame
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        # can't fetch source if there's no filename present or no line
        if not frame.get('abs_path') or not frame.get('lineno'):
            return

        # can't fetch if this is internal node module as well
        # therefore we only process user-land frames (starting with /)
        # or those created by bundle/webpack internals
        if self.data.get('platform') == 'node' and \
                not frame.get('abs_path').startswith(('/', 'app:', 'webpack:')):
            return

        errors = cache.get_errors(frame['abs_path'])
        if errors:
            all_errors.extend(errors)

        # This might fail but that's okay, we try with a different path a
        # bit later down the road.
        source = self.get_sourceview(frame['abs_path'])

        in_app = None
        new_frame = dict(frame)
        raw_frame = dict(frame)

        sourcemap_url, sourcemap_view = sourcemaps.get_link(frame['abs_path'])
        self.sourcemaps_touched.add(sourcemap_url)
        if sourcemap_view and frame.get('colno') is None:
            all_errors.append(
                {
                    'type': EventError.JS_NO_COLUMN,
                    'url': http.expose_url(frame['abs_path']),
                }
            )
        elif sourcemap_view:
            if is_data_uri(sourcemap_url):
                sourcemap_label = frame['abs_path']
            else:
                sourcemap_label = sourcemap_url

            sourcemap_label = http.expose_url(sourcemap_label)

            if frame.get('function'):
                minified_function_name = frame['function']
                minified_source = self.get_sourceview(frame['abs_path'])
            else:
                minified_function_name = minified_source = None

            try:
                # Errors are 1-indexed in the frames, so we need to -1 to get
                # zero-indexed value from tokens.
                assert frame['lineno'] > 0, "line numbers are 1-indexed"
                token = sourcemap_view.lookup(frame['lineno'] - 1,
                                              frame['colno'] - 1,
                                              minified_function_name,
                                              minified_source)
            except Exception:
                token = None
                all_errors.append(
                    {
                        'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                        'column': frame.get('colno'),
                        'row': frame.get('lineno'),
                        'source': frame['abs_path'],
                        'sourcemap': sourcemap_label,
                    }
                )

            # persist the token so that we can find it later
            processable_frame.data['token'] = token

            # Store original data in annotation
            new_frame['data'] = dict(frame.get('data') or {}, sourcemap=sourcemap_label)

            sourcemap_applied = True

            if token is not None:
                abs_path = urljoin(sourcemap_url, token.src)

                logger.debug(
                    'Mapping compressed source %r to mapping in %r', frame['abs_path'], abs_path
                )
                source = self.get_sourceview(abs_path)

            if source is None:
                errors = cache.get_errors(abs_path)
                if errors:
                    all_errors.extend(errors)
                else:
                    all_errors.append(
                        {
                            'type': EventError.JS_MISSING_SOURCE,
                            'url': http.expose_url(abs_path),
                        }
                    )

            if token is not None:
                # the tokens are zero indexed, so offset correctly
                new_frame['lineno'] = token.src_line + 1
                new_frame['colno'] = token.src_col + 1

                # Try to use the function name we got from symbolic
                original_function_name = token.function_name

                # In the ideal case we can use the function name from the
                # frame and the location to resolve the original name
                # through the heuristics in our sourcemap library.
                if original_function_name is None:
                    last_token = None

                    # Find the previous token for function name handling as a
                    # fallback.
                    if processable_frame.previous_frame and \
                       processable_frame.previous_frame.processor is self:
                        last_token = processable_frame.previous_frame.data.get('token')
                        if last_token:
                            original_function_name = last_token.name

                if original_function_name is not None:
                    new_frame['function'] = original_function_name

                filename = token.src
                # special case webpack support
                # abs_path will always be the full path with webpack:/// prefix.
                # filename will be relative to that
                if abs_path.startswith('webpack:'):
                    filename = abs_path
                    # webpack seems to use ~ to imply "relative to resolver root"
                    # which is generally seen for third party deps
                    # (i.e. node_modules)
                    if '/~/' in filename:
                        filename = '~/' + abs_path.split('/~/', 1)[-1]
                    else:
                        filename = filename.split('webpack:///', 1)[-1]

                    # As noted above:
                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
                    # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
                    #   eg. webpack:///webpack/bootstrap, webpack:///external
                    if filename.startswith('~/') or \
                            '/node_modules/' in filename or \
                            not filename.startswith('./'):
                        in_app = False
                    # And conversely, local dependencies start with './'
                    elif filename.startswith('./'):
                        in_app = True
                    # We want to explicitly generate a webpack module name
                    new_frame['module'] = generate_module(filename)

                # while you could technically use a subpath of 'node_modules' for your libraries,
                # it would be an extremely complicated decision and we've not seen anyone do it
                # so instead we assume if node_modules is in the path its part of the vendored code
                elif '/node_modules/' in abs_path:
                    in_app = False

                if abs_path.startswith('app:'):
                    if filename and NODE_MODULES_RE.search(filename):
                        in_app = False
                    else:
                        in_app = True

                new_frame['abs_path'] = abs_path
                new_frame['filename'] = filename
                if not frame.get('module') and abs_path.startswith(
                    ('http:', 'https:', 'webpack:', 'app:')
                ):
                    new_frame['module'] = generate_module(abs_path)

        elif sourcemap_url:
            new_frame['data'] = dict(
                new_frame.get('data') or {}, sourcemap=http.expose_url(sourcemap_url)
            )

        # TODO: theoretically a minified source could point to
        # another mapped, minified source
        changed_frame = self.expand_frame(new_frame, source=source)

        # If we did not manage to match but we do have a line or column
        # we want to report an error here.
        if not new_frame.get('context_line') \
           and source and \
           new_frame.get('colno') is not None:
            all_errors.append(
                {
                    'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    'column': new_frame['colno'],
                    'row': new_frame['lineno'],
                    'source': new_frame['abs_path'],
                }
            )

        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
        if sourcemap_applied or all_errors or changed_frame or \
           changed_raw:
            if in_app is not None:
                new_frame['in_app'] = in_app
                raw_frame['in_app'] = in_app
            return [new_frame], [raw_frame] if changed_raw else None, all_errors
Beispiel #5
0
def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise http.CannotFetch(
            {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
        )
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(
                result[0], result[1], zlib.decompress(result[2]), result[3], encoding
            )

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option('sentry:verify_ssl', False))
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option('sentry:token_header') or 'X-Sentry-Token'
                headers[token_header] = token

        with metrics.timer('sourcemaps.fetch'):
            result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url,
                 result.headers,
                 z_body,
                 result.status,
                 result.encoding),
                get_max_age(result.headers))

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch(
            {
                'type': EventError.FETCH_INVALID_HTTP_CODE,
                'value': result.status,
                'url': http.expose_url(url),
            }
        )

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(
                result.url, result.headers,
                result.body.encode('utf8'), result.status, result.encoding
            )
        except UnicodeEncodeError:
            error = {
                'type': EventError.FETCH_INVALID_ENCODING,
                'value': 'utf8',
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise http.CannotFetch(error)

    return result
Beispiel #6
0
    def process_frame(self, processable_frame, processing_task):
        frame = processable_frame.frame
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        # can't fetch source if there's no filename present or no line
        if not frame.get('abs_path') or not frame.get('lineno'):
            return

        # can't fetch if this is internal node module as well
        # therefore we only process user-land frames (starting with /)
        # or those created by bundle/webpack internals
        if self.data.get('platform') == 'node' and \
                not frame.get('abs_path').startswith(('/', 'app:', 'webpack:')):
            return

        errors = cache.get_errors(frame['abs_path'])
        if errors:
            all_errors.extend(errors)

        # This might fail but that's okay, we try with a different path a
        # bit later down the road.
        source = self.get_sourceview(frame['abs_path'])

        in_app = None
        new_frame = dict(frame)
        raw_frame = dict(frame)

        sourcemap_url, sourcemap_view = sourcemaps.get_link(frame['abs_path'])
        self.sourcemaps_touched.add(sourcemap_url)
        if sourcemap_view and frame.get('colno') is None:
            all_errors.append(
                {
                    'type': EventError.JS_NO_COLUMN,
                    'url': http.expose_url(frame['abs_path']),
                }
            )
        elif sourcemap_view:
            if is_data_uri(sourcemap_url):
                sourcemap_label = frame['abs_path']
            else:
                sourcemap_label = sourcemap_url

            sourcemap_label = http.expose_url(sourcemap_label)

            if frame.get('function'):
                minified_function_name = frame['function']
                minified_source = self.get_sourceview(frame['abs_path'])
            else:
                minified_function_name = minified_source = None

            try:
                # Errors are 1-indexed in the frames, so we need to -1 to get
                # zero-indexed value from tokens.
                assert frame['lineno'] > 0, "line numbers are 1-indexed"
                token = sourcemap_view.lookup(frame['lineno'] - 1,
                                              frame['colno'] - 1,
                                              minified_function_name,
                                              minified_source)
            except Exception:
                token = None
                all_errors.append(
                    {
                        'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                        'column': frame.get('colno'),
                        'row': frame.get('lineno'),
                        'source': frame['abs_path'],
                        'sourcemap': sourcemap_label,
                    }
                )

            # persist the token so that we can find it later
            processable_frame.data['token'] = token

            # Store original data in annotation
            new_frame['data'] = dict(frame.get('data') or {}, sourcemap=sourcemap_label)

            sourcemap_applied = True

            if token is not None:
                abs_path = urljoin(sourcemap_url, token.src)

                logger.debug(
                    'Mapping compressed source %r to mapping in %r', frame['abs_path'], abs_path
                )
                source = self.get_sourceview(abs_path)

            if source is None:
                errors = cache.get_errors(abs_path)
                if errors:
                    all_errors.extend(errors)
                else:
                    all_errors.append(
                        {
                            'type': EventError.JS_MISSING_SOURCE,
                            'url': http.expose_url(abs_path),
                        }
                    )

            if token is not None:
                # the tokens are zero indexed, so offset correctly
                new_frame['lineno'] = token.src_line + 1
                new_frame['colno'] = token.src_col + 1

                # Try to use the function name we got from symbolic
                original_function_name = token.function_name

                # In the ideal case we can use the function name from the
                # frame and the location to resolve the original name
                # through the heuristics in our sourcemap library.
                if original_function_name is None:
                    last_token = None

                    # Find the previous token for function name handling as a
                    # fallback.
                    if processable_frame.previous_frame and \
                       processable_frame.previous_frame.processor is self:
                        last_token = processable_frame.previous_frame.data.get('token')
                        if last_token:
                            original_function_name = last_token.name

                if original_function_name is not None:
                    new_frame['function'] = original_function_name

                filename = token.src
                # special case webpack support
                # abs_path will always be the full path with webpack:/// prefix.
                # filename will be relative to that
                if abs_path.startswith('webpack:'):
                    filename = abs_path
                    # webpack seems to use ~ to imply "relative to resolver root"
                    # which is generally seen for third party deps
                    # (i.e. node_modules)
                    if '/~/' in filename:
                        filename = '~/' + abs_path.split('/~/', 1)[-1]
                    else:
                        filename = filename.split('webpack:///', 1)[-1]

                    # As noted above:
                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
                    # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
                    #   eg. webpack:///webpack/bootstrap, webpack:///external
                    if filename.startswith('~/') or \
                            '/node_modules/' in filename or \
                            not filename.startswith('./'):
                        in_app = False
                    # And conversely, local dependencies start with './'
                    elif filename.startswith('./'):
                        in_app = True
                    # We want to explicitly generate a webpack module name
                    new_frame['module'] = generate_module(filename)

                # while you could technically use a subpath of 'node_modules' for your libraries,
                # it would be an extremely complicated decision and we've not seen anyone do it
                # so instead we assume if node_modules is in the path its part of the vendored code
                elif '/node_modules/' in abs_path:
                    in_app = False

                if abs_path.startswith('app:'):
                    if filename and NODE_MODULES_RE.search(filename):
                        in_app = False
                    else:
                        in_app = True

                new_frame['abs_path'] = abs_path
                new_frame['filename'] = filename
                if not frame.get('module') and abs_path.startswith(
                    ('http:', 'https:', 'webpack:', 'app:')
                ):
                    new_frame['module'] = generate_module(abs_path)

        elif sourcemap_url:
            new_frame['data'] = dict(
                new_frame.get('data') or {}, sourcemap=http.expose_url(sourcemap_url)
            )

        # TODO: theoretically a minified source could point to
        # another mapped, minified source
        changed_frame = self.expand_frame(new_frame, source=source)

        # If we did not manage to match but we do have a line or column
        # we want to report an error here.
        if not new_frame.get('context_line') \
           and source and \
           new_frame.get('colno') is not None:
            all_errors.append(
                {
                    'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    'column': new_frame['colno'],
                    'row': new_frame['lineno'],
                    'source': new_frame['abs_path'],
                }
            )

        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
        if sourcemap_applied or all_errors or changed_frame or \
           changed_raw:
            if in_app is not None:
                new_frame['in_app'] = in_app
                raw_frame['in_app'] = in_app
            return [new_frame], [raw_frame] if changed_raw else None, all_errors
Beispiel #7
0
def fetch_file(url, project=None, release=None, dist=None, allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the cache.
    """
    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == '...':
        raise http.CannotFetch(
            {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
        )
    if release:
        with metrics.timer('sourcemaps.release_file'):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    cache_key = 'source:cache:v4:%s' % (md5_text(url).hexdigest(), )

    if result is None:
        if not allow_scraping or not url.startswith(('http:', 'https:')):
            error = {
                'type': EventError.JS_MISSING_SOURCE,
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

        logger.debug('Checking cache for url %r', url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(
                result[0], result[1], zlib.decompress(result[2]), result[3], encoding
            )

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option('sentry:verify_ssl', False))
            token = project.get_option('sentry:token')
            if token:
                token_header = project.get_option('sentry:token_header') or 'X-Sentry-Token'
                headers[token_header] = token

        with metrics.timer('sourcemaps.fetch'):
            result = http.fetch_file(url, headers=headers, verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url,
                 result.headers,
                 z_body,
                 result.status,
                 result.encoding),
                get_max_age(result.headers))

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch(
            {
                'type': EventError.FETCH_INVALID_HTTP_CODE,
                'value': result.status,
                'url': http.expose_url(url),
            }
        )

    # Make sure the file we're getting back is six.binary_type. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, six.binary_type):
        try:
            result = http.UrlResult(
                result.url, result.headers,
                result.body.encode('utf8'), result.status, result.encoding
            )
        except UnicodeEncodeError:
            error = {
                'type': EventError.FETCH_INVALID_ENCODING,
                'value': 'utf8',
                'url': http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if url.endswith('.js'):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == u'<':
            error = {
                'type': EventError.JS_INVALID_CONTENT,
                'url': url,
            }
            raise http.CannotFetch(error)

    return result
Beispiel #8
0
    def process_frame(self, processable_frame, processing_task):
        """
        Attempt to demangle the given frame.
        """

        frame = processable_frame.frame
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        # can't demangle if there's no filename or line number present
        if not frame.get("abs_path") or not frame.get("lineno"):
            return

        # also can't demangle node's internal modules
        # therefore we only process user-land frames (starting with /)
        # or those created by bundle/webpack internals
        if self.data.get(
                "platform") == "node" and not frame.get("abs_path").startswith(
                    ("/", "app:", "webpack:")):
            return

        errors = cache.get_errors(frame["abs_path"])
        if errors:
            all_errors.extend(errors)

        # This might fail but that's okay, we try with a different path a
        # bit later down the road.
        source = self.get_sourceview(frame["abs_path"])

        in_app = None
        new_frame = dict(frame)
        raw_frame = dict(frame)

        sourcemap_url, sourcemap_view = sourcemaps.get_link(frame["abs_path"])
        self.sourcemaps_touched.add(sourcemap_url)
        if sourcemap_view and frame.get("colno") is None:
            all_errors.append({
                "type": EventError.JS_NO_COLUMN,
                "url": http.expose_url(frame["abs_path"])
            })
        elif sourcemap_view:
            if is_data_uri(sourcemap_url):
                sourcemap_label = frame["abs_path"]
            else:
                sourcemap_label = sourcemap_url

            sourcemap_label = http.expose_url(sourcemap_label)

            if frame.get("function"):
                minified_function_name = frame["function"]
                minified_source = self.get_sourceview(frame["abs_path"])
            else:
                minified_function_name = minified_source = None

            try:
                # Errors are 1-indexed in the frames, so we need to -1 to get
                # zero-indexed value from tokens.
                assert frame["lineno"] > 0, "line numbers are 1-indexed"
                token = sourcemap_view.lookup(frame["lineno"] - 1,
                                              frame["colno"] - 1,
                                              minified_function_name,
                                              minified_source)
            except Exception:
                token = None
                all_errors.append({
                    "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    "column": frame.get("colno"),
                    "row": frame.get("lineno"),
                    "source": frame["abs_path"],
                    "sourcemap": sourcemap_label,
                })

            # persist the token so that we can find it later
            processable_frame.data["token"] = token

            # Store original data in annotation
            new_frame["data"] = dict(frame.get("data") or {},
                                     sourcemap=sourcemap_label)

            sourcemap_applied = True

            if token is not None:
                abs_path = non_standard_url_join(sourcemap_url, token.src)

                logger.debug("Mapping compressed source %r to mapping in %r",
                             frame["abs_path"], abs_path)
                source = self.get_sourceview(abs_path)

            if source is None:
                errors = cache.get_errors(abs_path)
                if errors:
                    all_errors.extend(errors)
                else:
                    all_errors.append({
                        "type": EventError.JS_MISSING_SOURCE,
                        "url": http.expose_url(abs_path)
                    })

            if token is not None:
                # the tokens are zero indexed, so offset correctly
                new_frame["lineno"] = token.src_line + 1
                new_frame["colno"] = token.src_col + 1

                # Try to use the function name we got from symbolic
                original_function_name = token.function_name

                # In the ideal case we can use the function name from the
                # frame and the location to resolve the original name
                # through the heuristics in our sourcemap library.
                if original_function_name is None:
                    last_token = None

                    # Find the previous token for function name handling as a
                    # fallback.
                    if (processable_frame.previous_frame
                            and processable_frame.previous_frame.processor is
                            self):
                        last_token = processable_frame.previous_frame.data.get(
                            "token")
                        if last_token:
                            original_function_name = last_token.name

                if original_function_name is not None:
                    new_frame["function"] = original_function_name

                filename = token.src
                # special case webpack support
                # abs_path will always be the full path with webpack:/// prefix.
                # filename will be relative to that
                if abs_path.startswith("webpack:"):
                    filename = abs_path
                    # webpack seems to use ~ to imply "relative to resolver root"
                    # which is generally seen for third party deps
                    # (i.e. node_modules)
                    if "/~/" in filename:
                        filename = "~/" + abs_path.split("/~/", 1)[-1]
                    else:
                        filename = filename.split("webpack:///", 1)[-1]

                    # As noted above:
                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
                    # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
                    #   eg. webpack:///webpack/bootstrap, webpack:///external
                    if (filename.startswith("~/")
                            or "/node_modules/" in filename
                            or not filename.startswith("./")):
                        in_app = False
                    # And conversely, local dependencies start with './'
                    elif filename.startswith("./"):
                        in_app = True
                    # We want to explicitly generate a webpack module name
                    new_frame["module"] = generate_module(filename)

                # while you could technically use a subpath of 'node_modules' for your libraries,
                # it would be an extremely complicated decision and we've not seen anyone do it
                # so instead we assume if node_modules is in the path its part of the vendored code
                elif "/node_modules/" in abs_path:
                    in_app = False

                if abs_path.startswith("app:"):
                    if filename and NODE_MODULES_RE.search(filename):
                        in_app = False
                    else:
                        in_app = True

                new_frame["abs_path"] = abs_path
                new_frame["filename"] = filename
                if not frame.get("module") and abs_path.startswith(
                    ("http:", "https:", "webpack:", "app:")):
                    new_frame["module"] = generate_module(abs_path)

        elif sourcemap_url:
            new_frame["data"] = dict(new_frame.get("data") or {},
                                     sourcemap=http.expose_url(sourcemap_url))

        # TODO: theoretically a minified source could point to
        # another mapped, minified source
        changed_frame = self.expand_frame(new_frame, source=source)

        # If we did not manage to match but we do have a line or column
        # we want to report an error here.
        if not new_frame.get("context_line") and source and new_frame.get(
                "colno") is not None:
            all_errors.append({
                "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
                "column": new_frame["colno"],
                "row": new_frame["lineno"],
                "source": new_frame["abs_path"],
            })

        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)

        if sourcemap_applied or all_errors or changed_frame or changed_raw:
            # In case we are done processing, we iterate over all errors that we got
            # and we filter out all `JS_MISSING_SOURCE` errors since we consider if we have
            # a `context_line` we have a symbolicated frame and we don't need to show the error
            has_context_line = bool(new_frame.get("context_line"))
            if has_context_line:
                all_errors[:] = [
                    x for x in all_errors
                    if x.get("type") is not EventError.JS_MISSING_SOURCE
                ]

            if in_app is not None:
                new_frame["in_app"] = in_app
                raw_frame["in_app"] = in_app

            new_frames = [new_frame]
            raw_frames = [raw_frame] if changed_raw else None
            return new_frames, raw_frames, all_errors
Beispiel #9
0
def fetch_file(url,
               project=None,
               release=None,
               dist=None,
               allow_scraping=True):
    """
    Pull down a URL, returning a UrlResult object.

    Attempts to fetch from the database first (assuming there's a release on the
    event), then the internet. Caches the result of each of those two attempts
    separately, whether or not those attempts are successful. Used for both
    source files and source maps.
    """

    # If our url has been truncated, it'd be impossible to fetch
    # so we check for this early and bail
    if url[-3:] == "...":
        raise http.CannotFetch({
            "type": EventError.JS_MISSING_SOURCE,
            "url": http.expose_url(url)
        })

    # if we've got a release to look on, try that first (incl associated cache)
    if release:
        with metrics.timer("sourcemaps.release_file"):
            result = fetch_release_file(url, release, dist)
    else:
        result = None

    # otherwise, try the web-scraping cache and then the web itself

    cache_key = f"source:cache:v4:{md5_text(url).hexdigest()}"

    if result is None:
        if not allow_scraping or not url.startswith(("http:", "https:")):
            error = {
                "type": EventError.JS_MISSING_SOURCE,
                "url": http.expose_url(url)
            }
            raise http.CannotFetch(error)

        logger.debug("Checking cache for url %r", url)
        result = cache.get(cache_key)
        if result is not None:
            # Previous caches would be a 3-tuple instead of a 4-tuple,
            # so this is being maintained for backwards compatibility
            try:
                encoding = result[4]
            except IndexError:
                encoding = None
            # We got a cache hit, but the body is compressed, so we
            # need to decompress it before handing it off
            result = http.UrlResult(result[0], result[1],
                                    zlib.decompress(result[2]), result[3],
                                    encoding)

    if result is None:
        headers = {}
        verify_ssl = False
        if project and is_valid_origin(url, project=project):
            verify_ssl = bool(project.get_option("sentry:verify_ssl", False))
            token = project.get_option("sentry:token")
            if token:
                token_header = project.get_option(
                    "sentry:token_header") or "X-Sentry-Token"
                headers[token_header] = token

        with metrics.timer("sourcemaps.fetch"):
            result = http.fetch_file(url,
                                     headers=headers,
                                     verify_ssl=verify_ssl)
            z_body = zlib.compress(result.body)
            cache.set(
                cache_key,
                (url, result.headers, z_body, result.status, result.encoding),
                get_max_age(result.headers),
            )

            # since the cache.set above can fail we can end up in a situation
            # where the file is too large for the cache. In that case we abort
            # the fetch and cache a failure and lock the domain for future
            # http fetches.
            if cache.get(cache_key) is None:
                error = {
                    "type": EventError.TOO_LARGE_FOR_CACHE,
                    "url": http.expose_url(url),
                }
                http.lock_domain(url, error=error)
                raise http.CannotFetch(error)

    # If we did not get a 200 OK we just raise a cannot fetch here.
    if result.status != 200:
        raise http.CannotFetch({
            "type": EventError.FETCH_INVALID_HTTP_CODE,
            "value": result.status,
            "url": http.expose_url(url),
        })

    # Make sure the file we're getting back is bytes. The only
    # reason it'd not be binary would be from old cached blobs, so
    # for compatibility with current cached files, let's coerce back to
    # binary and say utf8 encoding.
    if not isinstance(result.body, bytes):
        try:
            result = http.UrlResult(
                result.url,
                result.headers,
                result.body.encode("utf8"),
                result.status,
                result.encoding,
            )
        except UnicodeEncodeError:
            error = {
                "type": EventError.FETCH_INVALID_ENCODING,
                "value": "utf8",
                "url": http.expose_url(url),
            }
            raise http.CannotFetch(error)

    # For JavaScript files, check if content is something other than JavaScript/JSON (i.e. HTML)
    # NOTE: possible to have JS files that don't actually end w/ ".js", but
    # this should catch 99% of cases
    if urlsplit(url).path.endswith(".js"):
        # Check if response is HTML by looking if the first non-whitespace character is an open tag ('<').
        # This cannot parse as valid JS/JSON.
        # NOTE: not relying on Content-Type header because apps often don't set this correctly
        # Discard leading whitespace (often found before doctype)
        body_start = result.body[:20].lstrip()

        if body_start[:1] == b"<":
            error = {"type": EventError.JS_INVALID_CONTENT, "url": url}
            raise http.CannotFetch(error)

    return result
Beispiel #10
0
    def process_frame(self, processable_frame, processing_task):
        frame = processable_frame.frame
        last_token = None
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        # can't fetch source if there's no filename present
        if not frame.get('abs_path'):
            return

        errors = cache.get_errors(frame['abs_path'])
        if errors:
            all_errors.extend(errors)

        # This might fail but that's okay, we try with a different path a
        # bit later down the road.
        source = self.get_source(frame['abs_path'])

        in_app = None
        new_frame = dict(frame)
        raw_frame = dict(frame)

        sourcemap_url, sourcemap_view = sourcemaps.get_link(frame['abs_path'])
        if sourcemap_view and frame.get('colno') is None:
            all_errors.append({
                'type': EventError.JS_NO_COLUMN,
                'url': http.expose_url(frame['abs_path']),
            })
        elif sourcemap_view:
            last_token = token

            if is_data_uri(sourcemap_url):
                sourcemap_label = frame['abs_path']
            else:
                sourcemap_label = sourcemap_url

            sourcemap_label = http.expose_url(sourcemap_label)

            try:
                # Errors are 1-indexed in the frames, so we need to -1 to get
                # zero-indexed value from tokens.
                assert frame['lineno'] > 0, "line numbers are 1-indexed"
                token = sourcemap_view.lookup_token(frame['lineno'] - 1,
                                                    frame['colno'])
            except Exception:
                token = None
                all_errors.append({
                    'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    'column': frame.get('colno'),
                    'row': frame.get('lineno'),
                    'source': frame['abs_path'],
                    'sourcemap': sourcemap_label,
                })

            # Store original data in annotation
            new_frame['data'] = dict(frame.get('data') or {},
                                     sourcemap=sourcemap_label)

            sourcemap_applied = True

            if token is not None:
                abs_path = urljoin(sourcemap_url, token.src)

                logger.debug('Mapping compressed source %r to mapping in %r',
                             frame['abs_path'], abs_path)
                source = self.get_source(abs_path)

            if not source:
                errors = cache.get_errors(abs_path)
                if errors:
                    all_errors.extend(errors)
                else:
                    all_errors.append({
                        'type': EventError.JS_MISSING_SOURCE,
                        'url': http.expose_url(abs_path),
                    })

            if token is not None:
                # Token's return zero-indexed lineno's
                new_frame['lineno'] = token.src_line + 1
                new_frame['colno'] = token.src_col
                # The offending function is always the previous function in the stack
                # Honestly, no idea what the bottom most frame is, so we're ignoring that atm
                if last_token:
                    new_frame['function'] = last_token.name or frame.get(
                        'function')
                else:
                    new_frame['function'] = token.name or frame.get('function')

                filename = token.src
                # special case webpack support
                # abs_path will always be the full path with webpack:/// prefix.
                # filename will be relative to that
                if abs_path.startswith('webpack:'):
                    filename = abs_path
                    # webpack seems to use ~ to imply "relative to resolver root"
                    # which is generally seen for third party deps
                    # (i.e. node_modules)
                    if '/~/' in filename:
                        filename = '~/' + abs_path.split('/~/', 1)[-1]
                    else:
                        filename = filename.split('webpack:///', 1)[-1]

                    # As noted above, '~/' means they're coming from node_modules,
                    # so these are not app dependencies
                    if filename.startswith('~/'):
                        in_app = False
                    # And conversely, local dependencies start with './'
                    elif filename.startswith('./'):
                        in_app = True

                    # We want to explicitly generate a webpack module name
                    new_frame['module'] = generate_module(filename)

                if abs_path.startswith('app:'):
                    if NODE_MODULES_RE.search(filename):
                        in_app = False
                    else:
                        in_app = True

                new_frame['abs_path'] = abs_path
                new_frame['filename'] = filename
                if not frame.get('module') and abs_path.startswith(
                    ('http:', 'https:', 'webpack:', 'app:')):
                    new_frame['module'] = generate_module(abs_path)

        elif sourcemap_url:
            new_frame['data'] = dict(new_frame.get('data') or {},
                                     sourcemap=http.expose_url(sourcemap_url))

        # TODO: theoretically a minified source could point to
        # another mapped, minified source
        changed_frame = self.expand_frame(new_frame, source=source)

        if not new_frame.get('context_line') and source:
            all_errors.append({
                'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                # Column might be missing here
                'column': new_frame.get('colno'),
                # Line might be missing here
                'row': new_frame.get('lineno'),
                'source': new_frame['abs_path'],
            })

        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
        if sourcemap_applied or all_errors or changed_frame or \
           changed_raw:
            if in_app is not None:
                new_frame['in_app'] = in_app
                raw_frame['in_app'] = in_app
            return [new_frame
                    ], [raw_frame] if changed_raw else None, all_errors
Beispiel #11
0
    def process_frame(self, processable_frame, processing_task):
        frame = processable_frame.frame
        last_token = None
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        # can't fetch source if there's no filename present
        if not frame.get('abs_path'):
            return

        errors = cache.get_errors(frame['abs_path'])
        if errors:
            all_errors.extend(errors)

        # This might fail but that's okay, we try with a different path a
        # bit later down the road.
        source = self.get_source(frame['abs_path'])

        in_app = None
        new_frame = dict(frame)
        raw_frame = dict(frame)

        sourcemap_url, sourcemap_view = sourcemaps.get_link(frame['abs_path'])
        self.sourcemaps_touched.add(sourcemap_url)
        if sourcemap_view and frame.get('colno') is None:
            all_errors.append({
                'type': EventError.JS_NO_COLUMN,
                'url': http.expose_url(frame['abs_path']),
            })
        elif sourcemap_view:
            last_token = token

            if is_data_uri(sourcemap_url):
                sourcemap_label = frame['abs_path']
            else:
                sourcemap_label = sourcemap_url

            sourcemap_label = http.expose_url(sourcemap_label)

            try:
                # Errors are 1-indexed in the frames, so we need to -1 to get
                # zero-indexed value from tokens.
                assert frame['lineno'] > 0, "line numbers are 1-indexed"
                token = sourcemap_view.lookup_token(
                    frame['lineno'] - 1, frame['colno'])
            except Exception:
                token = None
                all_errors.append({
                    'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    'column': frame.get('colno'),
                    'row': frame.get('lineno'),
                    'source': frame['abs_path'],
                    'sourcemap': sourcemap_label,
                })

            # Store original data in annotation
            new_frame['data'] = dict(frame.get('data') or {},
                                     sourcemap=sourcemap_label)

            sourcemap_applied = True

            if token is not None:
                abs_path = urljoin(sourcemap_url, token.src)

                logger.debug('Mapping compressed source %r to mapping in %r',
                             frame['abs_path'], abs_path)
                source = self.get_source(abs_path)

            if not source:
                errors = cache.get_errors(abs_path)
                if errors:
                    all_errors.extend(errors)
                else:
                    all_errors.append({
                        'type': EventError.JS_MISSING_SOURCE,
                        'url': http.expose_url(abs_path),
                    })

            if token is not None:
                # Token's return zero-indexed lineno's
                new_frame['lineno'] = token.src_line + 1
                new_frame['colno'] = token.src_col
                # The offending function is always the previous function in the stack
                # Honestly, no idea what the bottom most frame is, so we're ignoring that atm
                if last_token:
                    new_frame['function'] = last_token.name or frame.get('function')
                else:
                    new_frame['function'] = token.name or frame.get('function')

                filename = token.src
                # special case webpack support
                # abs_path will always be the full path with webpack:/// prefix.
                # filename will be relative to that
                if abs_path.startswith('webpack:'):
                    filename = abs_path
                    # webpack seems to use ~ to imply "relative to resolver root"
                    # which is generally seen for third party deps
                    # (i.e. node_modules)
                    if '/~/' in filename:
                        filename = '~/' + abs_path.split('/~/', 1)[-1]
                    else:
                        filename = filename.split('webpack:///', 1)[-1]

                    # As noted above, '~/' means they're coming from node_modules,
                    # so these are not app dependencies
                    if filename.startswith('~/'):
                        in_app = False
                    # And conversely, local dependencies start with './'
                    elif filename.startswith('./'):
                        in_app = True

                    # We want to explicitly generate a webpack module name
                    new_frame['module'] = generate_module(filename)

                if abs_path.startswith('app:'):
                    if NODE_MODULES_RE.search(filename):
                        in_app = False
                    else:
                        in_app = True

                new_frame['abs_path'] = abs_path
                new_frame['filename'] = filename
                if not frame.get('module') and abs_path.startswith(
                        ('http:', 'https:', 'webpack:', 'app:')):
                    new_frame['module'] = generate_module(abs_path)

        elif sourcemap_url:
            new_frame['data'] = dict(new_frame.get('data') or {},
                                     sourcemap=http.expose_url(sourcemap_url))

        # TODO: theoretically a minified source could point to
        # another mapped, minified source
        changed_frame = self.expand_frame(new_frame, source=source)

        if not new_frame.get('context_line') and source:
            all_errors.append({
                'type': EventError.JS_INVALID_SOURCEMAP_LOCATION,
                # Column might be missing here
                'column': new_frame.get('colno'),
                # Line might be missing here
                'row': new_frame.get('lineno'),
                'source': new_frame['abs_path'],
            })

        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)
        if sourcemap_applied or all_errors or changed_frame or \
           changed_raw:
            if in_app is not None:
                new_frame['in_app'] = in_app
                raw_frame['in_app'] = in_app
            return [new_frame], [raw_frame] if changed_raw else None, all_errors