def cache_source(self, filename): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES}) return # TODO: respect cache-control/max-age headers to some extent logger.debug("Fetching remote source %r", filename) try: result = fetch_file( filename, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body, result.encoding) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug("Found sourcemap %r for minified script %r", sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources for src_id, source_name in sourcemap_view.iter_sources(): source_view = sourcemap_view.get_sourceview(src_id) if source_view is not None: self.cache.add( non_standard_url_join(sourcemap_url, source_name), source_view)
def cache_source(self, filename): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, { 'type': EventError.JS_TOO_MANY_REMOTE_SOURCES, }) return # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_file(filename, project=self.project, release=self.release, allow_scraping=self.allow_scraping) except BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body, result.encoding) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=self.release, allow_scraping=self.allow_scraping, ) except BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources for src_id, source in sourcemap_view.iter_sources(): if sourcemap_view.has_source_contents(src_id): self.cache.add( urljoin(sourcemap_url, source), lambda view=sourcemap_view, id=src_id: view. get_source_contents(id), None, )
def cache_source(self, filename, release): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, { 'type': EventError.JS_TOO_MANY_REMOTE_SOURCES, }) return # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_file(filename, project=self.project, release=release, allow_scraping=self.allow_scraping) except BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body.split('\n')) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=release, allow_scraping=self.allow_scraping, ) except BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources # inline_sources = sourcemap_view.get_inline_content_sources(sourcemap_url) for src_id, source in sourcemap_view.iter_sources(): # TODO(mattrobenolt): This is slightly less than ideal, # but it's the simplest path for now. # Ideally, we would do this lazily. content = sourcemap_view.get_source_contents(src_id) if content is not None: self.cache.add(urljoin(sourcemap_url, source), content)
def cache_source(self, filename): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, { 'type': EventError.JS_TOO_MANY_REMOTE_SOURCES, }) return # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_file(filename, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping) except http.BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body, result.encoding) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources for src_id, source in sourcemap_view.iter_sources(): if sourcemap_view.has_source_contents(src_id): self.cache.add( urljoin(sourcemap_url, source), lambda view=sourcemap_view, id=src_id: view.get_source_contents(id), None, )
def cache_source(self, filename, release): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, { 'type': EventError.JS_TOO_MANY_REMOTE_SOURCES, }) return # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_file(filename, project=self.project, release=release, allow_scraping=self.allow_scraping) except BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body.split('\n')) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_idx = fetch_sourcemap( sourcemap_url, project=self.project, release=release, allow_scraping=self.allow_scraping, ) except BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_idx) # cache any inlined sources inline_sources = get_inline_content_sources(sourcemap_idx, sourcemap_url) for source in inline_sources: self.cache.add(*source)
def cache_source(self, filename, release): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, { 'type': EventError.JS_TOO_MANY_REMOTE_SOURCES, }) return # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_file(filename, project=self.project, release=release, allow_scraping=self.allow_scraping) except BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body.split('\n')) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_idx = fetch_sourcemap( sourcemap_url, project=self.project, release=release, allow_scraping=self.allow_scraping, ) except BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_idx) # cache any inlined sources for source in sourcemap_idx.sources: next_filename = urljoin(sourcemap_url, source) if source in sourcemap_idx.content: cache.add(next_filename, sourcemap_idx.content[source])
def populate_source_cache(self, project, frames, release): pending_file_list = set() done_file_list = set() sourcemap_capable = set() cache = self.cache sourcemaps = self.sourcemaps for f in frames: # We can't even attempt to fetch source if abs_path is None if f.abs_path is None: continue # tbh not entirely sure how this happens, but raven-js allows this # to be caught. I think this comes from dev consoles and whatnot # where there is no page. This just bails early instead of exposing # a fetch error that may be confusing. if f.abs_path == '<anonymous>': continue pending_file_list.add(f.abs_path) if f.colno is not None: sourcemap_capable.add(f.abs_path) idx = 0 while pending_file_list: filename = pending_file_list.pop() done_file_list.add(filename) if idx > self.max_fetches: cache.add_error(filename, { 'type': EventError.JS_TOO_MANY_REMOTE_SOURCES, }) continue idx += 1 # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_file(filename, project=project, release=release, allow_scraping=self.allow_scraping) except BadSource as exc: cache.add_error(filename, exc.data) continue cache.add(filename, result.body.split('\n')) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: continue # If we didn't have a colno, a sourcemap wont do us any good if filename not in sourcemap_capable: cache.add_error(filename, { 'type': EventError.JS_NO_COLUMN, 'url': filename, }) continue logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: continue # pull down sourcemap try: sourcemap_idx = fetch_sourcemap( sourcemap_url, project=project, release=release, allow_scraping=self.allow_scraping, ) except BadSource as exc: cache.add_error(filename, exc.data) continue sourcemaps.add(sourcemap_url, sourcemap_idx) # queue up additional source files for download for source in sourcemap_idx.sources: next_filename = urljoin(sourcemap_url, source) if next_filename not in done_file_list: if source in sourcemap_idx.content: cache.add(next_filename, sourcemap_idx.content[source]) done_file_list.add(next_filename) else: pending_file_list.add(next_filename)
def populate_source_cache(self, project, frames, release): pending_file_list = set() done_file_list = set() sourcemap_capable = set() cache = self.cache sourcemaps = self.sourcemaps for f in frames: pending_file_list.add(f.abs_path) if f.colno is not None: sourcemap_capable.add(f.abs_path) idx = 0 while pending_file_list: idx += 1 filename = pending_file_list.pop() done_file_list.add(filename) if idx > self.max_fetches: cache.add_error(filename, ERR_TOO_MANY_REMOTE_SOURCES) continue # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_url(filename, project=project, release=release) except BadSource as exc: cache.add_error(filename, unicode(exc)) continue cache.add(filename, result.body.splitlines()) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: continue # If we didn't have a colno, a sourcemap wont do us any good if filename not in sourcemap_capable: cache.add_error(filename, ERR_NO_COLUMN) continue logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: continue # pull down sourcemap try: sourcemap_idx = fetch_sourcemap( sourcemap_url, project=project, release=release, ) except BadSource as exc: cache.add_error(filename, unicode(exc)) continue sourcemaps.add(sourcemap_url, sourcemap_idx) # queue up additional source files for download for source in sourcemap_idx.sources: next_filename = urljoin(sourcemap_url, source) if next_filename not in done_file_list: if sourcemap_idx.content: cache.add(next_filename, sourcemap_idx.content[source]) done_file_list.add(next_filename) else: pending_file_list.add(next_filename)
def cache_source(self, filename): """ Look for and (if found) cache a source file and its associated source map (if any). """ sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES}) return # TODO: respect cache-control/max-age headers to some extent logger.debug("Attempting to cache source %r", filename) try: # this both looks in the database and tries to scrape the internet with sentry_sdk.start_span( op="JavaScriptStacktraceProcessor.cache_source.fetch_file" ) as span: span.set_data("filename", filename) result = fetch_file( filename, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: # most people don't upload release artifacts for their third-party libraries, # so ignore missing node_modules files if exc.data[ "type"] == EventError.JS_MISSING_SOURCE and "node_modules" in filename: pass else: cache.add_error(filename, exc.data) # either way, there's no more for us to do here, since we don't have # a valid file to cache return cache.add(filename, result.body, result.encoding) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug("Found sourcemap URL %r for minified script %r", sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: with sentry_sdk.start_span( op= "JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap" ) as span: span.set_data("sourcemap_url", sourcemap_url) sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=self.release, dist=self.dist, allow_scraping=self.allow_scraping, ) except http.BadSource as exc: # we don't perform the same check here as above, because if someone has # uploaded a node_modules file, which has a sourceMappingURL, they # presumably would like it mapped (and would like to know why it's not # working, if that's the case). If they're not looking for it to be # mapped, then they shouldn't be uploading the source file in the # first place. cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources for src_id, source_name in sourcemap_view.iter_sources(): source_view = sourcemap_view.get_sourceview(src_id) if source_view is not None: self.cache.add( non_standard_url_join(sourcemap_url, source_name), source_view)
def cache_source(self, filename, release): sourcemaps = self.sourcemaps cache = self.cache self.fetch_count += 1 if self.fetch_count > self.max_fetches: cache.add_error(filename, { 'type': EventError.JS_TOO_MANY_REMOTE_SOURCES, }) return # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) try: result = fetch_file(filename, project=self.project, release=release, allow_scraping=self.allow_scraping) except BadSource as exc: cache.add_error(filename, exc.data) return cache.add(filename, result.body.split('\n')) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: return logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: return # pull down sourcemap try: sourcemap_view = fetch_sourcemap( sourcemap_url, project=self.project, release=release, allow_scraping=self.allow_scraping, ) except BadSource as exc: cache.add_error(filename, exc.data) return sourcemaps.add(sourcemap_url, sourcemap_view) # cache any inlined sources # inline_sources = sourcemap_view.get_inline_content_sources(sourcemap_url) for src_id, source in sourcemap_view.iter_sources(): # TODO(mattrobenolt): This is slightly less than ideal, # but it's the simplest path for now. # Ideally, we would do this lazily. content = sourcemap_view.get_source_contents(src_id) if content is not None: # TODO(mattrobenolt): This is gross. libsourcemap returns back # bytes, and our internal stuff assumed unicode. So everything else in # the pipeline assumes unicode and working with bytes is harder. # So let's coerce here to unicodes just to conform to API for both, # but remove this and handle bytes down the line when done. if isinstance(content, six.binary_type): content = content.decode('utf-8', errors='replace') self.cache.add(urljoin(sourcemap_url, source), content.split(u'\n'))
def populate_source_cache(self, project, frames): pending_file_list = set() done_file_list = set() sourcemap_capable = set() cache = self.cache sourcemaps = self.sourcemaps for f in frames: pending_file_list.add(f.abs_path) if f.colno is not None: sourcemap_capable.add(f.abs_path) idx = 0 while pending_file_list: idx += 1 filename = pending_file_list.pop() done_file_list.add(filename) if idx > self.max_fetches: cache.add_error(filename, 'Not fetching context due to too many remote sources') continue # TODO: respect cache-control/max-age headers to some extent logger.debug('Fetching remote source %r', filename) result = fetch_url(filename, project=project) if result == BAD_SOURCE: # TODO(dcramer): we want better errors here cache.add_error(filename, 'File was unreachable or invalid') continue cache.add(filename, result.body.splitlines()) cache.alias(result.url, filename) sourcemap_url = discover_sourcemap(result) if not sourcemap_url: continue # If we didn't have a colno, a sourcemap wont do us any good if filename not in sourcemap_capable: cache.add_error(filename, 'No column information available (cant expand sourcemap)') continue logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url) sourcemaps.link(filename, sourcemap_url) if sourcemap_url in sourcemaps: continue # pull down sourcemap sourcemap_idx = fetch_sourcemap(sourcemap_url, project=project) if not sourcemap_idx: cache.add_error(filename, 'Sourcemap was not parseable') continue sourcemaps.add(sourcemap_url, sourcemap_idx) # queue up additional source files for download for source in sourcemap_idx.sources: next_filename = urljoin(sourcemap_url, source) if next_filename not in done_file_list: if sourcemap_idx.content: cache.add(next_filename, sourcemap_idx.content[source]) done_file_list.add(next_filename) else: pending_file_list.add(next_filename)