def cache_source(self, filename): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES}) return # TODO: respect cache-control/max-age headers to some extent logger.debug("Fetching remote source %r", filename) try: result = fetch_file( filename, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body, result.encoding) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug("Found sourcemap %r for minified script %r", sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources for src_id, source_name in sourcemap_view.iter_sources(): source_view = sourcemap_view.get_sourceview(src_id) if source_view is not None: self.cache.add( non_standard_url_join(sourcemap_url, source_name), source_view)
def discover_sourcemap(result): """ Given a UrlResult object, attempt to discover a sourcemap. """ # When coercing the headers returned by urllib to a dict # all keys become lowercase so they're normalized sourcemap = result.headers.get("sourcemap", result.headers.get("x-sourcemap")) if not sourcemap: parsed_body = result.body.split("\n") # Source maps are only going to exist at either the top or bottom of the document. # Technically, there isn't anything indicating *where* it should exist, so we # are generous and assume it's somewhere either in the first or last 5 lines. # If it's somewhere else in the document, you're probably doing it wrong. if len(parsed_body) > 10: possibilities = parsed_body[:5] + parsed_body[-5:] else: possibilities = parsed_body # We want to scan each line sequentially, and the last one found wins # This behavior is undocumented, but matches what Chrome and Firefox do. for line in possibilities: if line[:21] in ("//# sourceMappingURL=", "//@ sourceMappingURL="): # We want everything AFTER the indicator, which is 21 chars long sourcemap = line[21:].rstrip() # If we still haven't found anything, check end of last line AFTER source code. # This is not the literal interpretation of the spec, but browsers support it. # e.g. {code}//# sourceMappingURL={url} if not sourcemap: # Only look at last 300 characters to keep search space reasonable (minified # JS on a single line could be tens of thousands of chars). This is a totally # arbitrary number / best guess; most sourceMappingURLs are relative and # not very long. search_space = possibilities[-1][-300:].rstrip() match = SOURCE_MAPPING_URL_RE.search(search_space) if match: sourcemap = match.group(1) if sourcemap: # react-native shoves a comment at the end of the # sourceMappingURL line. # For example: # sourceMappingURL=app.js.map/*ascii:...*/ # This comment is completely out of spec and no browser # would support this, but we need to strip it to make # people happy. if "/*" in sourcemap and sourcemap[-2:] == "*/": index = sourcemap.index("/*") # comment definitely shouldn't be the first character, # so let's just make sure of that. if index == 0: raise AssertionError( "react-native comment found at bad location: %d, %r" % (index, sourcemap) ) sourcemap = sourcemap[:index] # fix url so its absolute sourcemap = non_standard_url_join(result.url, sourcemap) return sourcemap
def cache_source(self, filename): """ Look for and (if found) cache a source file and its associated source map (if any). """ sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES}) return # TODO: respect cache-control/max-age headers to some extent logger.debug("Attempting to cache source %r", filename) try: # this both looks in the database and tries to scrape the internet with sentry_sdk.start_span( op="JavaScriptStacktraceProcessor.cache_source.fetch_file" ) as span: span.set_data("filename", filename) result = fetch_file( filename, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: # most people don't upload release artifacts for their third-party libraries, # so ignore missing node_modules files if exc.data[ "type"] == EventError.JS_MISSING_SOURCE and "node_modules" in filename: pass else: cache.add_error(filename, exc.data) # either way, there's no more for us to do here, since we don't have # a valid file to cache return cache.add(filename, result.body, result.encoding) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug("Found sourcemap URL %r for minified script %r", sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: with sentry_sdk.start_span( op= "JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap" ) as span: span.set_data("sourcemap_url", sourcemap_url) sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: # we don't perform the same check here as above, because if someone has # uploaded a node_modules file, which has a sourceMappingURL, they # presumably would like it mapped (and would like to know why it's not # working, if that's the case). If they're not looking for it to be # mapped, then they shouldn't be uploading the source file in the # first place. cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources for src_id, source_name in sourcemap_view.iter_sources(): source_view = sourcemap_view.get_sourceview(src_id) if source_view is not None: self.cache.add( non_standard_url_join(sourcemap_url, source_name), source_view)
def process_frame(self, processable_frame, processing_task): """ Attempt to demangle the given frame. """ frame = processable_frame.frame token = None cache = self.cache sourcemaps = self.sourcemaps all_errors = [] sourcemap_applied = False # can't demangle if there's no filename or line number present if not frame.get("abs_path") or not frame.get("lineno"): return # also can't demangle node's internal modules # therefore we only process user-land frames (starting with /) # or those created by bundle/webpack internals if self.data.get( "platform") == "node" and not frame.get("abs_path").startswith( ("/", "app:", "webpack:")): return errors = cache.get_errors(frame["abs_path"]) if errors: all_errors.extend(errors) # This might fail but that's okay, we try with a different path a # bit later down the road. source = self.get_sourceview(frame["abs_path"]) in_app = None new_frame = dict(frame) raw_frame = dict(frame) sourcemap_url, sourcemap_view = sourcemaps.get_link(frame["abs_path"]) self.sourcemaps_touched.add(sourcemap_url) if sourcemap_view and frame.get("colno") is None: all_errors.append({ "type": EventError.JS_NO_COLUMN, "url": http.expose_url(frame["abs_path"]) }) elif sourcemap_view: if is_data_uri(sourcemap_url): sourcemap_label = frame["abs_path"] else: sourcemap_label = sourcemap_url sourcemap_label = http.expose_url(sourcemap_label) if frame.get("function"): minified_function_name = frame["function"] minified_source = self.get_sourceview(frame["abs_path"]) else: minified_function_name = minified_source = None try: # Errors are 1-indexed in the frames, so we need to -1 to get # zero-indexed value from tokens. assert frame["lineno"] > 0, "line numbers are 1-indexed" token = sourcemap_view.lookup(frame["lineno"] - 1, frame["colno"] - 1, minified_function_name, minified_source) except Exception: token = None all_errors.append({ "type": EventError.JS_INVALID_SOURCEMAP_LOCATION, "column": frame.get("colno"), "row": frame.get("lineno"), "source": frame["abs_path"], "sourcemap": sourcemap_label, }) # persist the token so that we can find it later processable_frame.data["token"] = token # Store original data in annotation new_frame["data"] = dict(frame.get("data") or {}, sourcemap=sourcemap_label) sourcemap_applied = True if token is not None: abs_path = non_standard_url_join(sourcemap_url, token.src) logger.debug("Mapping compressed source %r to mapping in %r", frame["abs_path"], abs_path) source = self.get_sourceview(abs_path) if source is None: errors = cache.get_errors(abs_path) if errors: all_errors.extend(errors) else: all_errors.append({ "type": EventError.JS_MISSING_SOURCE, "url": http.expose_url(abs_path) }) if token is not None: # the tokens are zero indexed, so offset correctly new_frame["lineno"] = token.src_line + 1 new_frame["colno"] = token.src_col + 1 # Try to use the function name we got from symbolic original_function_name = token.function_name # In the ideal case we can use the function name from the # frame and the location to resolve the original name # through the heuristics in our sourcemap library. if original_function_name is None: last_token = None # Find the previous token for function name handling as a # fallback. if (processable_frame.previous_frame and processable_frame.previous_frame.processor is self): last_token = processable_frame.previous_frame.data.get( "token") if last_token: original_function_name = last_token.name if original_function_name is not None: new_frame["function"] = original_function_name filename = token.src # special case webpack support # abs_path will always be the full path with webpack:/// prefix. # filename will be relative to that if abs_path.startswith("webpack:"): filename = abs_path # webpack seems to use ~ to imply "relative to resolver root" # which is generally seen for third party deps # (i.e. node_modules) if "/~/" in filename: filename = "~/" + abs_path.split("/~/", 1)[-1] else: filename = filename.split("webpack:///", 1)[-1] # As noted above: # * [js/node] '~/' means they're coming from node_modules, so these are not app dependencies # * [node] sames goes for `./node_modules/` and '../node_modules/', which is used when bundling node apps # * [node] and webpack, which includes it's own code to bootstrap all modules and its internals # eg. webpack:///webpack/bootstrap, webpack:///external if (filename.startswith("~/") or "/node_modules/" in filename or not filename.startswith("./")): in_app = False # And conversely, local dependencies start with './' elif filename.startswith("./"): in_app = True # We want to explicitly generate a webpack module name new_frame["module"] = generate_module(filename) # while you could technically use a subpath of 'node_modules' for your libraries, # it would be an extremely complicated decision and we've not seen anyone do it # so instead we assume if node_modules is in the path its part of the vendored code elif "/node_modules/" in abs_path: in_app = False if abs_path.startswith("app:"): if filename and NODE_MODULES_RE.search(filename): in_app = False else: in_app = True new_frame["abs_path"] = abs_path new_frame["filename"] = filename if not frame.get("module") and abs_path.startswith( ("http:", "https:", "webpack:", "app:")): new_frame["module"] = generate_module(abs_path) elif sourcemap_url: new_frame["data"] = dict(new_frame.get("data") or {}, sourcemap=http.expose_url(sourcemap_url)) # TODO: theoretically a minified source could point to # another mapped, minified source changed_frame = self.expand_frame(new_frame, source=source) # If we did not manage to match but we do have a line or column # we want to report an error here. if not new_frame.get("context_line") and source and new_frame.get( "colno") is not None: all_errors.append({ "type": EventError.JS_INVALID_SOURCEMAP_LOCATION, "column": new_frame["colno"], "row": new_frame["lineno"], "source": new_frame["abs_path"], }) changed_raw = sourcemap_applied and self.expand_frame(raw_frame) if sourcemap_applied or all_errors or changed_frame or changed_raw: # In case we are done processing, we iterate over all errors that we got # and we filter out all `JS_MISSING_SOURCE` errors since we consider if we have # a `context_line` we have a symbolicated frame and we don't need to show the error has_context_line = bool(new_frame.get("context_line")) if has_context_line: all_errors[:] = [ x for x in all_errors if x.get("type") is not EventError.JS_MISSING_SOURCE ] if in_app is not None: new_frame["in_app"] = in_app raw_frame["in_app"] = in_app new_frames = [new_frame] raw_frames = [raw_frame] if changed_raw else None return new_frames, raw_frames, all_errors
def test_non_standard_url_join(base, to_join, expected): assert non_standard_url_join(base, to_join) == expected