Example #1
0
    def cache_source(self, filename):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename,
                            {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug("Fetching remote source %r", filename)
        try:
            result = fetch_file(
                filename,
                project=self.project,
                release=self.release,
                dist=self.dist,
                allow_scraping=self.allow_scraping,
            )
        except http.BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body, result.encoding)
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug("Found sourcemap %r for minified script %r",
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_view = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=self.release,
                dist=self.dist,
                allow_scraping=self.allow_scraping,
            )
        except http.BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        for src_id, source_name in sourcemap_view.iter_sources():
            source_view = sourcemap_view.get_sourceview(src_id)
            if source_view is not None:
                self.cache.add(
                    non_standard_url_join(sourcemap_url, source_name),
                    source_view)
Example #2
0
def discover_sourcemap(result):
    """
    Given a UrlResult object, attempt to discover a sourcemap.
    """
    # When coercing the headers returned by urllib to a dict
    # all keys become lowercase so they're normalized
    sourcemap = result.headers.get("sourcemap", result.headers.get("x-sourcemap"))

    if not sourcemap:
        parsed_body = result.body.split("\n")
        # Source maps are only going to exist at either the top or bottom of the document.
        # Technically, there isn't anything indicating *where* it should exist, so we
        # are generous and assume it's somewhere either in the first or last 5 lines.
        # If it's somewhere else in the document, you're probably doing it wrong.
        if len(parsed_body) > 10:
            possibilities = parsed_body[:5] + parsed_body[-5:]
        else:
            possibilities = parsed_body

        # We want to scan each line sequentially, and the last one found wins
        # This behavior is undocumented, but matches what Chrome and Firefox do.
        for line in possibilities:
            if line[:21] in ("//# sourceMappingURL=", "//@ sourceMappingURL="):
                # We want everything AFTER the indicator, which is 21 chars long
                sourcemap = line[21:].rstrip()

        # If we still haven't found anything, check end of last line AFTER source code.
        # This is not the literal interpretation of the spec, but browsers support it.
        # e.g. {code}//# sourceMappingURL={url}
        if not sourcemap:
            # Only look at last 300 characters to keep search space reasonable (minified
            # JS on a single line could be tens of thousands of chars). This is a totally
            # arbitrary number / best guess; most sourceMappingURLs are relative and
            # not very long.
            search_space = possibilities[-1][-300:].rstrip()
            match = SOURCE_MAPPING_URL_RE.search(search_space)
            if match:
                sourcemap = match.group(1)

    if sourcemap:
        # react-native shoves a comment at the end of the
        # sourceMappingURL line.
        # For example:
        #  sourceMappingURL=app.js.map/*ascii:...*/
        # This comment is completely out of spec and no browser
        # would support this, but we need to strip it to make
        # people happy.
        if "/*" in sourcemap and sourcemap[-2:] == "*/":
            index = sourcemap.index("/*")
            # comment definitely shouldn't be the first character,
            # so let's just make sure of that.
            if index == 0:
                raise AssertionError(
                    "react-native comment found at bad location: %d, %r" % (index, sourcemap)
                )
            sourcemap = sourcemap[:index]
        # fix url so its absolute
        sourcemap = non_standard_url_join(result.url, sourcemap)

    return sourcemap
Example #3
0
    def cache_source(self, filename):
        """
        Look for and (if found) cache a source file and its associated source
        map (if any).
        """

        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename,
                            {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug("Attempting to cache source %r", filename)
        try:
            # this both looks in the database and tries to scrape the internet
            with sentry_sdk.start_span(
                    op="JavaScriptStacktraceProcessor.cache_source.fetch_file"
            ) as span:
                span.set_data("filename", filename)
                result = fetch_file(
                    filename,
                    project=self.project,
                    release=self.release,
                    dist=self.dist,
                    allow_scraping=self.allow_scraping,
                )
        except http.BadSource as exc:
            # most people don't upload release artifacts for their third-party libraries,
            # so ignore missing node_modules files
            if exc.data[
                    "type"] == EventError.JS_MISSING_SOURCE and "node_modules" in filename:
                pass
            else:
                cache.add_error(filename, exc.data)

            # either way, there's no more for us to do here, since we don't have
            # a valid file to cache
            return
        cache.add(filename, result.body, result.encoding)
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug("Found sourcemap URL %r for minified script %r",
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            with sentry_sdk.start_span(
                    op=
                    "JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap"
            ) as span:
                span.set_data("sourcemap_url", sourcemap_url)
                sourcemap_view = fetch_sourcemap(
                    sourcemap_url,
                    project=self.project,
                    release=self.release,
                    dist=self.dist,
                    allow_scraping=self.allow_scraping,
                )
        except http.BadSource as exc:
            # we don't perform the same check here as above, because if someone has
            # uploaded a node_modules file, which has a sourceMappingURL, they
            # presumably would like it mapped (and would like to know why it's not
            # working, if that's the case). If they're not looking for it to be
            # mapped, then they shouldn't be uploading the source file in the
            # first place.
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        for src_id, source_name in sourcemap_view.iter_sources():
            source_view = sourcemap_view.get_sourceview(src_id)
            if source_view is not None:
                self.cache.add(
                    non_standard_url_join(sourcemap_url, source_name),
                    source_view)
Example #4
0
    def process_frame(self, processable_frame, processing_task):
        """
        Attempt to demangle the given frame.
        """

        frame = processable_frame.frame
        token = None

        cache = self.cache
        sourcemaps = self.sourcemaps
        all_errors = []
        sourcemap_applied = False

        # can't demangle if there's no filename or line number present
        if not frame.get("abs_path") or not frame.get("lineno"):
            return

        # also can't demangle node's internal modules
        # therefore we only process user-land frames (starting with /)
        # or those created by bundle/webpack internals
        if self.data.get(
                "platform") == "node" and not frame.get("abs_path").startswith(
                    ("/", "app:", "webpack:")):
            return

        errors = cache.get_errors(frame["abs_path"])
        if errors:
            all_errors.extend(errors)

        # This might fail but that's okay, we try with a different path a
        # bit later down the road.
        source = self.get_sourceview(frame["abs_path"])

        in_app = None
        new_frame = dict(frame)
        raw_frame = dict(frame)

        sourcemap_url, sourcemap_view = sourcemaps.get_link(frame["abs_path"])
        self.sourcemaps_touched.add(sourcemap_url)
        if sourcemap_view and frame.get("colno") is None:
            all_errors.append({
                "type": EventError.JS_NO_COLUMN,
                "url": http.expose_url(frame["abs_path"])
            })
        elif sourcemap_view:
            if is_data_uri(sourcemap_url):
                sourcemap_label = frame["abs_path"]
            else:
                sourcemap_label = sourcemap_url

            sourcemap_label = http.expose_url(sourcemap_label)

            if frame.get("function"):
                minified_function_name = frame["function"]
                minified_source = self.get_sourceview(frame["abs_path"])
            else:
                minified_function_name = minified_source = None

            try:
                # Errors are 1-indexed in the frames, so we need to -1 to get
                # zero-indexed value from tokens.
                assert frame["lineno"] > 0, "line numbers are 1-indexed"
                token = sourcemap_view.lookup(frame["lineno"] - 1,
                                              frame["colno"] - 1,
                                              minified_function_name,
                                              minified_source)
            except Exception:
                token = None
                all_errors.append({
                    "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
                    "column": frame.get("colno"),
                    "row": frame.get("lineno"),
                    "source": frame["abs_path"],
                    "sourcemap": sourcemap_label,
                })

            # persist the token so that we can find it later
            processable_frame.data["token"] = token

            # Store original data in annotation
            new_frame["data"] = dict(frame.get("data") or {},
                                     sourcemap=sourcemap_label)

            sourcemap_applied = True

            if token is not None:
                abs_path = non_standard_url_join(sourcemap_url, token.src)

                logger.debug("Mapping compressed source %r to mapping in %r",
                             frame["abs_path"], abs_path)
                source = self.get_sourceview(abs_path)

            if source is None:
                errors = cache.get_errors(abs_path)
                if errors:
                    all_errors.extend(errors)
                else:
                    all_errors.append({
                        "type": EventError.JS_MISSING_SOURCE,
                        "url": http.expose_url(abs_path)
                    })

            if token is not None:
                # the tokens are zero indexed, so offset correctly
                new_frame["lineno"] = token.src_line + 1
                new_frame["colno"] = token.src_col + 1

                # Try to use the function name we got from symbolic
                original_function_name = token.function_name

                # In the ideal case we can use the function name from the
                # frame and the location to resolve the original name
                # through the heuristics in our sourcemap library.
                if original_function_name is None:
                    last_token = None

                    # Find the previous token for function name handling as a
                    # fallback.
                    if (processable_frame.previous_frame
                            and processable_frame.previous_frame.processor is
                            self):
                        last_token = processable_frame.previous_frame.data.get(
                            "token")
                        if last_token:
                            original_function_name = last_token.name

                if original_function_name is not None:
                    new_frame["function"] = original_function_name

                filename = token.src
                # special case webpack support
                # abs_path will always be the full path with webpack:/// prefix.
                # filename will be relative to that
                if abs_path.startswith("webpack:"):
                    filename = abs_path
                    # webpack seems to use ~ to imply "relative to resolver root"
                    # which is generally seen for third party deps
                    # (i.e. node_modules)
                    if "/~/" in filename:
                        filename = "~/" + abs_path.split("/~/", 1)[-1]
                    else:
                        filename = filename.split("webpack:///", 1)[-1]

                    # As noted above:
                    # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies
                    # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps
                    # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals
                    #   eg. webpack:///webpack/bootstrap, webpack:///external
                    if (filename.startswith("~/")
                            or "/node_modules/" in filename
                            or not filename.startswith("./")):
                        in_app = False
                    # And conversely, local dependencies start with './'
                    elif filename.startswith("./"):
                        in_app = True
                    # We want to explicitly generate a webpack module name
                    new_frame["module"] = generate_module(filename)

                # while you could technically use a subpath of 'node_modules' for your libraries,
                # it would be an extremely complicated decision and we've not seen anyone do it
                # so instead we assume if node_modules is in the path its part of the vendored code
                elif "/node_modules/" in abs_path:
                    in_app = False

                if abs_path.startswith("app:"):
                    if filename and NODE_MODULES_RE.search(filename):
                        in_app = False
                    else:
                        in_app = True

                new_frame["abs_path"] = abs_path
                new_frame["filename"] = filename
                if not frame.get("module") and abs_path.startswith(
                    ("http:", "https:", "webpack:", "app:")):
                    new_frame["module"] = generate_module(abs_path)

        elif sourcemap_url:
            new_frame["data"] = dict(new_frame.get("data") or {},
                                     sourcemap=http.expose_url(sourcemap_url))

        # TODO: theoretically a minified source could point to
        # another mapped, minified source
        changed_frame = self.expand_frame(new_frame, source=source)

        # If we did not manage to match but we do have a line or column
        # we want to report an error here.
        if not new_frame.get("context_line") and source and new_frame.get(
                "colno") is not None:
            all_errors.append({
                "type": EventError.JS_INVALID_SOURCEMAP_LOCATION,
                "column": new_frame["colno"],
                "row": new_frame["lineno"],
                "source": new_frame["abs_path"],
            })

        changed_raw = sourcemap_applied and self.expand_frame(raw_frame)

        if sourcemap_applied or all_errors or changed_frame or changed_raw:
            # In case we are done processing, we iterate over all errors that we got
            # and we filter out all `JS_MISSING_SOURCE` errors since we consider if we have
            # a `context_line` we have a symbolicated frame and we don't need to show the error
            has_context_line = bool(new_frame.get("context_line"))
            if has_context_line:
                all_errors[:] = [
                    x for x in all_errors
                    if x.get("type") is not EventError.JS_MISSING_SOURCE
                ]

            if in_app is not None:
                new_frame["in_app"] = in_app
                raw_frame["in_app"] = in_app

            new_frames = [new_frame]
            raw_frames = [raw_frame] if changed_raw else None
            return new_frames, raw_frames, all_errors
Example #5
0
def test_non_standard_url_join(base, to_join, expected):
    assert non_standard_url_join(base, to_join) == expected