Beispiel #1
0
    def cache_source(self, filename):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename,
                            {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug("Fetching remote source %r", filename)
        try:
            result = fetch_file(
                filename,
                project=self.project,
                release=self.release,
                dist=self.dist,
                allow_scraping=self.allow_scraping,
            )
        except http.BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body, result.encoding)
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug("Found sourcemap %r for minified script %r",
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_view = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=self.release,
                dist=self.dist,
                allow_scraping=self.allow_scraping,
            )
        except http.BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        for src_id, source_name in sourcemap_view.iter_sources():
            source_view = sourcemap_view.get_sourceview(src_id)
            if source_view is not None:
                self.cache.add(
                    non_standard_url_join(sourcemap_url, source_name),
                    source_view)
Beispiel #2
0
    def cache_source(self, filename):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename, {
                'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
            })
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug('Fetching remote source %r', filename)
        try:
            result = fetch_file(filename,
                                project=self.project,
                                release=self.release,
                                allow_scraping=self.allow_scraping)
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body, result.encoding)
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug('Found sourcemap %r for minified script %r',
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_view = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=self.release,
                allow_scraping=self.allow_scraping,
            )
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        for src_id, source in sourcemap_view.iter_sources():
            if sourcemap_view.has_source_contents(src_id):
                self.cache.add(
                    urljoin(sourcemap_url, source),
                    lambda view=sourcemap_view, id=src_id: view.
                    get_source_contents(id),
                    None,
                )
Beispiel #3
0
    def cache_source(self, filename, release):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename, {
                'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
            })
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug('Fetching remote source %r', filename)
        try:
            result = fetch_file(filename,
                                project=self.project,
                                release=release,
                                allow_scraping=self.allow_scraping)
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body.split('\n'))
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug('Found sourcemap %r for minified script %r',
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_view = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=release,
                allow_scraping=self.allow_scraping,
            )
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        # inline_sources = sourcemap_view.get_inline_content_sources(sourcemap_url)
        for src_id, source in sourcemap_view.iter_sources():
            # TODO(mattrobenolt): This is slightly less than ideal,
            # but it's the simplest path for now.
            # Ideally, we would do this lazily.
            content = sourcemap_view.get_source_contents(src_id)
            if content is not None:
                self.cache.add(urljoin(sourcemap_url, source), content)
Beispiel #4
0
    def cache_source(self, filename):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename, {
                'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
            })
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug('Fetching remote source %r', filename)
        try:
            result = fetch_file(filename, project=self.project,
                                release=self.release,
                                dist=self.dist,
                                allow_scraping=self.allow_scraping)
        except http.BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body, result.encoding)
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug('Found sourcemap %r for minified script %r',
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_view = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=self.release,
                dist=self.dist,
                allow_scraping=self.allow_scraping,
            )
        except http.BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        for src_id, source in sourcemap_view.iter_sources():
            if sourcemap_view.has_source_contents(src_id):
                self.cache.add(
                    urljoin(sourcemap_url, source),
                    lambda view=sourcemap_view, id=src_id: view.get_source_contents(id),
                    None,
                )
Beispiel #5
0
    def cache_source(self, filename, release):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename, {
                'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
            })
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug('Fetching remote source %r', filename)
        try:
            result = fetch_file(filename,
                                project=self.project,
                                release=release,
                                allow_scraping=self.allow_scraping)
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body.split('\n'))
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug('Found sourcemap %r for minified script %r',
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_idx = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=release,
                allow_scraping=self.allow_scraping,
            )
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_idx)

        # cache any inlined sources
        inline_sources = get_inline_content_sources(sourcemap_idx,
                                                    sourcemap_url)
        for source in inline_sources:
            self.cache.add(*source)
Beispiel #6
0
    def cache_source(self, filename, release):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename, {
                'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
            })
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug('Fetching remote source %r', filename)
        try:
            result = fetch_file(filename, project=self.project, release=release,
                                allow_scraping=self.allow_scraping)
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body.split('\n'))
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_idx = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=release,
                allow_scraping=self.allow_scraping,
            )
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_idx)

        # cache any inlined sources
        for source in sourcemap_idx.sources:
            next_filename = urljoin(sourcemap_url, source)
            if source in sourcemap_idx.content:
                cache.add(next_filename, sourcemap_idx.content[source])
Beispiel #7
0
    def populate_source_cache(self, project, frames, release):
        pending_file_list = set()
        done_file_list = set()
        sourcemap_capable = set()

        cache = self.cache
        sourcemaps = self.sourcemaps

        for f in frames:
            # We can't even attempt to fetch source if abs_path is None
            if f.abs_path is None:
                continue
            # tbh not entirely sure how this happens, but raven-js allows this
            # to be caught. I think this comes from dev consoles and whatnot
            # where there is no page. This just bails early instead of exposing
            # a fetch error that may be confusing.
            if f.abs_path == '<anonymous>':
                continue
            pending_file_list.add(f.abs_path)
            if f.colno is not None:
                sourcemap_capable.add(f.abs_path)

        idx = 0
        while pending_file_list:
            filename = pending_file_list.pop()
            done_file_list.add(filename)

            if idx > self.max_fetches:
                cache.add_error(filename, {
                    'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
                })
                continue

            idx += 1

            # TODO: respect cache-control/max-age headers to some extent
            logger.debug('Fetching remote source %r', filename)
            try:
                result = fetch_file(filename, project=project, release=release,
                                    allow_scraping=self.allow_scraping)
            except BadSource as exc:
                cache.add_error(filename, exc.data)
                continue

            cache.add(filename, result.body.split('\n'))
            cache.alias(result.url, filename)

            sourcemap_url = discover_sourcemap(result)
            if not sourcemap_url:
                continue

            # If we didn't have a colno, a sourcemap wont do us any good
            if filename not in sourcemap_capable:
                cache.add_error(filename, {
                    'type': EventError.JS_NO_COLUMN,
                    'url': filename,
                })
                continue

            logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url)

            sourcemaps.link(filename, sourcemap_url)
            if sourcemap_url in sourcemaps:
                continue

            # pull down sourcemap
            try:
                sourcemap_idx = fetch_sourcemap(
                    sourcemap_url,
                    project=project,
                    release=release,
                    allow_scraping=self.allow_scraping,
                )
            except BadSource as exc:
                cache.add_error(filename, exc.data)
                continue

            sourcemaps.add(sourcemap_url, sourcemap_idx)

            # queue up additional source files for download
            for source in sourcemap_idx.sources:
                next_filename = urljoin(sourcemap_url, source)
                if next_filename not in done_file_list:
                    if source in sourcemap_idx.content:
                        cache.add(next_filename, sourcemap_idx.content[source])
                        done_file_list.add(next_filename)
                    else:
                        pending_file_list.add(next_filename)
Beispiel #8
0
    def populate_source_cache(self, project, frames, release):
        pending_file_list = set()
        done_file_list = set()
        sourcemap_capable = set()

        cache = self.cache
        sourcemaps = self.sourcemaps

        for f in frames:
            pending_file_list.add(f.abs_path)
            if f.colno is not None:
                sourcemap_capable.add(f.abs_path)

        idx = 0
        while pending_file_list:
            idx += 1
            filename = pending_file_list.pop()
            done_file_list.add(filename)

            if idx > self.max_fetches:
                cache.add_error(filename, ERR_TOO_MANY_REMOTE_SOURCES)
                continue

            # TODO: respect cache-control/max-age headers to some extent
            logger.debug('Fetching remote source %r', filename)
            try:
                result = fetch_url(filename, project=project, release=release)
            except BadSource as exc:
                cache.add_error(filename, unicode(exc))
                continue

            cache.add(filename, result.body.splitlines())
            cache.alias(result.url, filename)

            sourcemap_url = discover_sourcemap(result)
            if not sourcemap_url:
                continue

            # If we didn't have a colno, a sourcemap wont do us any good
            if filename not in sourcemap_capable:
                cache.add_error(filename, ERR_NO_COLUMN)
                continue

            logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url)

            sourcemaps.link(filename, sourcemap_url)
            if sourcemap_url in sourcemaps:
                continue

            # pull down sourcemap
            try:
                sourcemap_idx = fetch_sourcemap(
                    sourcemap_url,
                    project=project,
                    release=release,
                )
            except BadSource as exc:
                cache.add_error(filename, unicode(exc))
                continue

            sourcemaps.add(sourcemap_url, sourcemap_idx)

            # queue up additional source files for download
            for source in sourcemap_idx.sources:
                next_filename = urljoin(sourcemap_url, source)
                if next_filename not in done_file_list:
                    if sourcemap_idx.content:
                        cache.add(next_filename, sourcemap_idx.content[source])
                        done_file_list.add(next_filename)
                    else:
                        pending_file_list.add(next_filename)
Beispiel #9
0
    def populate_source_cache(self, project, frames, release):
        pending_file_list = set()
        done_file_list = set()
        sourcemap_capable = set()

        cache = self.cache
        sourcemaps = self.sourcemaps

        for f in frames:
            pending_file_list.add(f.abs_path)
            if f.colno is not None:
                sourcemap_capable.add(f.abs_path)

        idx = 0
        while pending_file_list:
            idx += 1
            filename = pending_file_list.pop()
            done_file_list.add(filename)

            if idx > self.max_fetches:
                cache.add_error(filename, ERR_TOO_MANY_REMOTE_SOURCES)
                continue

            # TODO: respect cache-control/max-age headers to some extent
            logger.debug('Fetching remote source %r', filename)
            try:
                result = fetch_url(filename, project=project, release=release)
            except BadSource as exc:
                cache.add_error(filename, unicode(exc))
                continue

            cache.add(filename, result.body.splitlines())
            cache.alias(result.url, filename)

            sourcemap_url = discover_sourcemap(result)
            if not sourcemap_url:
                continue

            # If we didn't have a colno, a sourcemap wont do us any good
            if filename not in sourcemap_capable:
                cache.add_error(filename, ERR_NO_COLUMN)
                continue

            logger.debug('Found sourcemap %r for minified script %r',
                         sourcemap_url[:256], result.url)

            sourcemaps.link(filename, sourcemap_url)
            if sourcemap_url in sourcemaps:
                continue

            # pull down sourcemap
            try:
                sourcemap_idx = fetch_sourcemap(
                    sourcemap_url,
                    project=project,
                    release=release,
                )
            except BadSource as exc:
                cache.add_error(filename, unicode(exc))
                continue

            sourcemaps.add(sourcemap_url, sourcemap_idx)

            # queue up additional source files for download
            for source in sourcemap_idx.sources:
                next_filename = urljoin(sourcemap_url, source)
                if next_filename not in done_file_list:
                    if sourcemap_idx.content:
                        cache.add(next_filename, sourcemap_idx.content[source])
                        done_file_list.add(next_filename)
                    else:
                        pending_file_list.add(next_filename)
Beispiel #10
0
    def cache_source(self, filename):
        """
        Look for and (if found) cache a source file and its associated source
        map (if any).
        """

        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename,
                            {"type": EventError.JS_TOO_MANY_REMOTE_SOURCES})
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug("Attempting to cache source %r", filename)
        try:
            # this both looks in the database and tries to scrape the internet
            with sentry_sdk.start_span(
                    op="JavaScriptStacktraceProcessor.cache_source.fetch_file"
            ) as span:
                span.set_data("filename", filename)
                result = fetch_file(
                    filename,
                    project=self.project,
                    release=self.release,
                    dist=self.dist,
                    allow_scraping=self.allow_scraping,
                )
        except http.BadSource as exc:
            # most people don't upload release artifacts for their third-party libraries,
            # so ignore missing node_modules files
            if exc.data[
                    "type"] == EventError.JS_MISSING_SOURCE and "node_modules" in filename:
                pass
            else:
                cache.add_error(filename, exc.data)

            # either way, there's no more for us to do here, since we don't have
            # a valid file to cache
            return
        cache.add(filename, result.body, result.encoding)
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug("Found sourcemap URL %r for minified script %r",
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            with sentry_sdk.start_span(
                    op=
                    "JavaScriptStacktraceProcessor.cache_source.fetch_sourcemap"
            ) as span:
                span.set_data("sourcemap_url", sourcemap_url)
                sourcemap_view = fetch_sourcemap(
                    sourcemap_url,
                    project=self.project,
                    release=self.release,
                    dist=self.dist,
                    allow_scraping=self.allow_scraping,
                )
        except http.BadSource as exc:
            # we don't perform the same check here as above, because if someone has
            # uploaded a node_modules file, which has a sourceMappingURL, they
            # presumably would like it mapped (and would like to know why it's not
            # working, if that's the case). If they're not looking for it to be
            # mapped, then they shouldn't be uploading the source file in the
            # first place.
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        for src_id, source_name in sourcemap_view.iter_sources():
            source_view = sourcemap_view.get_sourceview(src_id)
            if source_view is not None:
                self.cache.add(
                    non_standard_url_join(sourcemap_url, source_name),
                    source_view)
Beispiel #11
0
    def cache_source(self, filename, release):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename, {
                'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
            })
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug('Fetching remote source %r', filename)
        try:
            result = fetch_file(filename, project=self.project, release=release,
                                allow_scraping=self.allow_scraping)
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body.split('\n'))
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_view = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=release,
                allow_scraping=self.allow_scraping,
            )
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        # inline_sources = sourcemap_view.get_inline_content_sources(sourcemap_url)
        for src_id, source in sourcemap_view.iter_sources():
            # TODO(mattrobenolt): This is slightly less than ideal,
            # but it's the simplest path for now.
            # Ideally, we would do this lazily.
            content = sourcemap_view.get_source_contents(src_id)
            if content is not None:
                # TODO(mattrobenolt): This is gross. libsourcemap returns back
                # bytes, and our internal stuff assumed unicode. So everything else in
                # the pipeline assumes unicode and working with bytes is harder.
                # So let's coerce here to unicodes just to conform to API for both,
                # but remove this and handle bytes down the line when done.
                if isinstance(content, six.binary_type):
                    content = content.decode('utf-8', errors='replace')
                self.cache.add(urljoin(sourcemap_url, source), content.split(u'\n'))
Beispiel #12
0
    def populate_source_cache(self, project, frames, release):
        pending_file_list = set()
        done_file_list = set()
        sourcemap_capable = set()

        cache = self.cache
        sourcemaps = self.sourcemaps

        for f in frames:
            # We can't even attempt to fetch source if abs_path is None
            if f.abs_path is None:
                continue
            # tbh not entirely sure how this happens, but raven-js allows this
            # to be caught. I think this comes from dev consoles and whatnot
            # where there is no page. This just bails early instead of exposing
            # a fetch error that may be confusing.
            if f.abs_path == '<anonymous>':
                continue
            pending_file_list.add(f.abs_path)
            if f.colno is not None:
                sourcemap_capable.add(f.abs_path)

        idx = 0
        while pending_file_list:
            filename = pending_file_list.pop()
            done_file_list.add(filename)

            if idx > self.max_fetches:
                cache.add_error(filename, {
                    'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
                })
                continue

            idx += 1

            # TODO: respect cache-control/max-age headers to some extent
            logger.debug('Fetching remote source %r', filename)
            try:
                result = fetch_file(filename,
                                    project=project,
                                    release=release,
                                    allow_scraping=self.allow_scraping)
            except BadSource as exc:
                cache.add_error(filename, exc.data)
                continue

            cache.add(filename, result.body.split('\n'))
            cache.alias(result.url, filename)

            sourcemap_url = discover_sourcemap(result)
            if not sourcemap_url:
                continue

            # If we didn't have a colno, a sourcemap wont do us any good
            if filename not in sourcemap_capable:
                cache.add_error(filename, {
                    'type': EventError.JS_NO_COLUMN,
                    'url': filename,
                })
                continue

            logger.debug('Found sourcemap %r for minified script %r',
                         sourcemap_url[:256], result.url)

            sourcemaps.link(filename, sourcemap_url)
            if sourcemap_url in sourcemaps:
                continue

            # pull down sourcemap
            try:
                sourcemap_idx = fetch_sourcemap(
                    sourcemap_url,
                    project=project,
                    release=release,
                    allow_scraping=self.allow_scraping,
                )
            except BadSource as exc:
                cache.add_error(filename, exc.data)
                continue

            sourcemaps.add(sourcemap_url, sourcemap_idx)

            # queue up additional source files for download
            for source in sourcemap_idx.sources:
                next_filename = urljoin(sourcemap_url, source)
                if next_filename not in done_file_list:
                    if source in sourcemap_idx.content:
                        cache.add(next_filename, sourcemap_idx.content[source])
                        done_file_list.add(next_filename)
                    else:
                        pending_file_list.add(next_filename)
Beispiel #13
0
    def populate_source_cache(self, project, frames):
        pending_file_list = set()
        done_file_list = set()
        sourcemap_capable = set()

        cache = self.cache
        sourcemaps = self.sourcemaps

        for f in frames:
            pending_file_list.add(f.abs_path)
            if f.colno is not None:
                sourcemap_capable.add(f.abs_path)

        idx = 0
        while pending_file_list:
            idx += 1
            filename = pending_file_list.pop()
            done_file_list.add(filename)

            if idx > self.max_fetches:
                cache.add_error(filename, 'Not fetching context due to too many remote sources')
                continue

            # TODO: respect cache-control/max-age headers to some extent
            logger.debug('Fetching remote source %r', filename)
            result = fetch_url(filename, project=project)

            if result == BAD_SOURCE:
                # TODO(dcramer): we want better errors here
                cache.add_error(filename, 'File was unreachable or invalid')
                continue

            cache.add(filename, result.body.splitlines())
            cache.alias(result.url, filename)

            sourcemap_url = discover_sourcemap(result)
            if not sourcemap_url:
                continue

            # If we didn't have a colno, a sourcemap wont do us any good
            if filename not in sourcemap_capable:
                cache.add_error(filename, 'No column information available (cant expand sourcemap)')
                continue

            logger.debug('Found sourcemap %r for minified script %r', sourcemap_url[:256], result.url)

            sourcemaps.link(filename, sourcemap_url)
            if sourcemap_url in sourcemaps:
                continue

            # pull down sourcemap
            sourcemap_idx = fetch_sourcemap(sourcemap_url, project=project)
            if not sourcemap_idx:
                cache.add_error(filename, 'Sourcemap was not parseable')
                continue

            sourcemaps.add(sourcemap_url, sourcemap_idx)

            # queue up additional source files for download
            for source in sourcemap_idx.sources:
                next_filename = urljoin(sourcemap_url, source)
                if next_filename not in done_file_list:
                    if sourcemap_idx.content:
                        cache.add(next_filename, sourcemap_idx.content[source])
                        done_file_list.add(next_filename)
                    else:
                        pending_file_list.add(next_filename)
Beispiel #14
0
    def cache_source(self, filename, release):
        sourcemaps = self.sourcemaps
        cache = self.cache

        self.fetch_count += 1

        if self.fetch_count > self.max_fetches:
            cache.add_error(filename, {
                'type': EventError.JS_TOO_MANY_REMOTE_SOURCES,
            })
            return

        # TODO: respect cache-control/max-age headers to some extent
        logger.debug('Fetching remote source %r', filename)
        try:
            result = fetch_file(filename,
                                project=self.project,
                                release=release,
                                allow_scraping=self.allow_scraping)
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        cache.add(filename, result.body.split('\n'))
        cache.alias(result.url, filename)

        sourcemap_url = discover_sourcemap(result)
        if not sourcemap_url:
            return

        logger.debug('Found sourcemap %r for minified script %r',
                     sourcemap_url[:256], result.url)
        sourcemaps.link(filename, sourcemap_url)
        if sourcemap_url in sourcemaps:
            return

        # pull down sourcemap
        try:
            sourcemap_view = fetch_sourcemap(
                sourcemap_url,
                project=self.project,
                release=release,
                allow_scraping=self.allow_scraping,
            )
        except BadSource as exc:
            cache.add_error(filename, exc.data)
            return

        sourcemaps.add(sourcemap_url, sourcemap_view)

        # cache any inlined sources
        # inline_sources = sourcemap_view.get_inline_content_sources(sourcemap_url)
        for src_id, source in sourcemap_view.iter_sources():
            # TODO(mattrobenolt): This is slightly less than ideal,
            # but it's the simplest path for now.
            # Ideally, we would do this lazily.
            content = sourcemap_view.get_source_contents(src_id)
            if content is not None:
                # TODO(mattrobenolt): This is gross. libsourcemap returns back
                # bytes, and our internal stuff assumed unicode. So everything else in
                # the pipeline assumes unicode and working with bytes is harder.
                # So let's coerce here to unicodes just to conform to API for both,
                # but remove this and handle bytes down the line when done.
                if isinstance(content, six.binary_type):
                    content = content.decode('utf-8', errors='replace')
                self.cache.add(urljoin(sourcemap_url, source),
                               content.split(u'\n'))