Ejemplo n.º 1
0
def _merge_archives(release_file: ReleaseFile, new_file: File,
                    new_archive: ReleaseArchive):
    max_attempts = RELEASE_ARCHIVE_MAX_MERGE_ATTEMPTS
    success = False
    for attempt in range(max_attempts):
        old_file = release_file.file
        with ReleaseArchive(old_file.getfile().file) as old_archive:
            buffer = BytesIO()
            merge_release_archives(old_archive, new_archive, buffer)

            replacement = File.objects.create(name=old_file.name,
                                              type=old_file.type)
            buffer.seek(0)
            replacement.putfile(buffer)

            with transaction.atomic():
                release_file.refresh_from_db()
                if release_file.file == old_file:
                    # Nothing has changed. It is safe to update
                    release_file.update(file=replacement)
                    success = True
                    break
                else:
                    metrics.incr("tasks.assemble.merge_archives_retry",
                                 instance=str(attempt))
    else:
        logger.error("Failed to merge archive in %s attempts, giving up.",
                     max_attempts)

    if success:
        old_file.delete()

    new_file.delete()
Ejemplo n.º 2
0
def _merge_archives(release_file: ReleaseFile, new_file: File,
                    new_archive: ReleaseArchive):

    lock_key = f"assemble:merge_archives:{release_file.id}"
    lock = app.locks.get(lock_key, duration=60)

    try:
        with lock.blocking_acquire(RELEASE_ARCHIVE_MERGE_INITIAL_DELAY,
                                   RELEASE_ARCHIVE_MERGE_TIMEOUT):
            old_file = release_file.file
            old_file_contents = ReleaseFile.cache.getfile(release_file)
            buffer = BytesIO()

            with metrics.timer("tasks.assemble.merge_archives_pure"):
                did_merge = merge_release_archives(old_file_contents,
                                                   new_archive, buffer)

            if did_merge:
                replacement = File.objects.create(name=old_file.name,
                                                  type=old_file.type)
                buffer.seek(0)
                replacement.putfile(buffer)
                release_file.update(file=replacement)
                old_file.delete()

    except UnableToAcquireLock as error:
        logger.error("merge_archives.fail", extra={"error": error})

    new_file.delete()
Ejemplo n.º 3
0
def _simple_update(release_file: ReleaseFile, new_file: File,
                   new_archive: ReleaseArchive,
                   additional_fields: dict) -> bool:
    """Update function used in _upsert_release_file"""
    old_file = release_file.file
    release_file.update(file=new_file, **additional_fields)
    old_file.delete()

    return True
    def test_simple(self):
        project = self.create_project(name="foo")

        release = Release.objects.create(organization_id=project.organization_id, version="1")
        release.add_project(project)

        url = reverse(
            "sentry-api-0-organization-release-files",
            kwargs={"organization_slug": project.organization.slug, "version": release.version},
        )

        self.login_as(user=self.user)

        response = self.client.post(
            url,
            {
                "name": "http://example.com/application.js",
                "header": "X-SourceMap: http://example.com",
                "file": SimpleUploadedFile(
                    "application.js", b"function() { }", content_type="application/javascript"
                ),
            },
            format="multipart",
        )

        assert response.status_code == 201, response.content

        releasefile = ReleaseFile.objects.get(release=release)
        assert releasefile.name == "http://example.com/application.js"
        assert releasefile.ident == ReleaseFile.get_ident("http://example.com/application.js")
        assert releasefile.file.headers == {
            "Content-Type": "application/javascript",
            "X-SourceMap": "http://example.com",
        }
Ejemplo n.º 5
0
def fetch_release_file(filename, release):
    cache_key = "releasefile:%s:%s" % (release.id, md5(filename).hexdigest())
    logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id)
    result = cache.get(cache_key)
    if result is None:
        logger.debug("Checking database for release artifact %r (release_id=%s)", filename, release.id)
        ident = ReleaseFile.get_ident(filename)
        try:
            releasefile = (
                ReleaseFile.objects.filter(release=release, ident=ident).select_related("file", "file__blob").get()
            )
        except ReleaseFile.DoesNotExist:
            logger.debug("Release artifact %r not found in database (release_id=%s)", filename, release.id)
            cache.set(cache_key, -1, 60)
            return None

        logger.debug("Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id)
        try:
            with releasefile.file.getfile() as fp:
                body = fp.read()
        except Exception as e:
            logger.exception(unicode(e))
            result = -1
        else:
            result = (releasefile.file.headers, body, 200)
        cache.set(cache_key, result, 3600)

    if result == -1:
        result = None

    return result
Ejemplo n.º 6
0
    def test_simple(self):
        self.login_as(user=self.user)

        project = self.create_project(name="foo")

        release = Release.objects.create(
            organization_id=project.organization_id, version="1")
        release.add_project(project)

        releasefile = ReleaseFile.objects.create(
            organization_id=project.organization_id,
            release=release,
            file=File.objects.create(name="application.js",
                                     type="release.file"),
            name="http://example.com/application.js",
        )

        url = reverse(
            "sentry-api-0-organization-release-file-details",
            kwargs={
                "organization_slug": project.organization.slug,
                "version": release.version,
                "file_id": releasefile.id,
            },
        )

        response = self.client.put(url, {"name": "foobar"})

        assert response.status_code == 200, response.content
        assert response.data["id"] == six.text_type(releasefile.id)

        releasefile = ReleaseFile.objects.get(id=releasefile.id)
        assert releasefile.name == "foobar"
        assert releasefile.ident == ReleaseFile.get_ident("foobar")
Ejemplo n.º 7
0
    def test_simple(self):
        project = self.create_project(name='foo')

        release = Release.objects.create(
            organization_id=project.organization_id,
            version='1',
        )
        release.add_project(project)

        url = reverse('sentry-api-0-organization-release-files', kwargs={
            'organization_slug': project.organization.slug,
            'version': release.version,
        })

        self.login_as(user=self.user)

        response = self.client.post(url, {
            'name': 'http://example.com/application.js',
            'header': 'X-SourceMap: http://example.com',
            'file': SimpleUploadedFile('application.js', b'function() { }',
                                       content_type='application/javascript'),
        }, format='multipart')

        assert response.status_code == 201, response.content

        releasefile = ReleaseFile.objects.get(release=release)
        assert releasefile.name == 'http://example.com/application.js'
        assert releasefile.ident == ReleaseFile.get_ident('http://example.com/application.js')
        assert releasefile.file.headers == {
            'Content-Type': 'application/javascript',
            'X-SourceMap': 'http://example.com',
        }
Ejemplo n.º 8
0
    def test_simple(self):
        project = self.create_project(name='foo')

        release = Release.objects.create(
            organization_id=project.organization_id,
            version='1',
        )
        release.add_project(project)

        url = reverse('sentry-api-0-release-files', kwargs={
            'organization_slug': project.organization.slug,
            'project_slug': project.slug,
            'version': release.version,
        })

        self.login_as(user=self.user)

        response = self.client.post(url, {
            'name': 'http://example.com/application.js',
            'header': 'X-SourceMap: http://example.com',
            'file': SimpleUploadedFile('application.js', b'function() { }',
                                       content_type='application/javascript'),
        }, format='multipart')

        assert response.status_code == 201, response.content

        releasefile = ReleaseFile.objects.get(release=release)
        assert releasefile.name == 'http://example.com/application.js'
        assert releasefile.ident == ReleaseFile.get_ident('http://example.com/application.js')
        assert releasefile.file.headers == {
            'Content-Type': 'application/javascript',
            'X-SourceMap': 'http://example.com',
        }
Ejemplo n.º 9
0
def fetch_release_file(filename, release):
    cache_key = 'release:%s:%s' % (
        release.id,
        hashlib.sha1(filename.encode('utf-8')).hexdigest(),
    )
    logger.debug('Checking cache for release artfiact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)
    if result is None:
        logger.debug('Checking database for release artifact %r (release_id=%s)',
                     filename, release.id)
        ident = ReleaseFile.get_ident(filename)
        try:
            releasefile = ReleaseFile.objects.filter(
                release=release,
                ident=ident,
            ).select_related('file').get()
        except ReleaseFile.DoesNotExist:
            logger.debug('Release artifact %r not found in database (release_id=%s)',
                         filename, release.id)
            return None

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        with releasefile.file.getfile() as fp:
            body = fp.read()
        result = (releasefile.file.headers, body, 200)
        cache.set(cache_key, result, 60)

    return result
Ejemplo n.º 10
0
def fetch_release_file(filename, release):
    cache_key = 'release:%s:%s' % (
        release.version,
        hashlib.sha1(filename.encode('utf-8')).hexdigest(),
    )
    logger.debug('Checking cache for release artfiact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)
    if result is None:
        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)
        ident = ReleaseFile.get_ident(filename)
        try:
            releasefile = ReleaseFile.objects.filter(
                release=release,
                ident=ident,
            ).select_related('file').get()
        except ReleaseFile.DoesNotExist:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            return None

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        with releasefile.file.getfile() as fp:
            body = fp.read()
        result = (releasefile.file.headers, body, 200)
        cache.set(cache_key, result, 60)

    return result
Ejemplo n.º 11
0
def get_from_archive(url: str, archive: ReleaseArchive) -> Tuple[bytes, dict]:
    candidates = ReleaseFile.normalize(url)
    for candidate in candidates:
        try:
            return archive.get_file_by_url(candidate)
        except KeyError:
            pass

    # None of the filenames matched
    raise KeyError(f"Not found in archive: '{url}'")
Ejemplo n.º 12
0
def pseudo_releasefile(url, info, dist):
    """Create a pseudo-ReleaseFile from an ArtifactIndex entry"""
    return ReleaseFile(
        name=url,
        file=File(
            headers=info.get("headers", {}),
            size=info["size"],
            timestamp=info["date_created"],
            checksum=info["sha1"],
        ),
        dist_id=dist.id if dist else dist,
    )
Ejemplo n.º 13
0
def get_index_entry(release, dist, url) -> Optional[dict]:
    try:
        index = get_artifact_index(release, dist)
    except Exception as exc:
        logger.error("sourcemaps.index_read_failed", exc_info=exc)
        return None

    if index:
        for candidate in ReleaseFile.normalize(url):
            entry = index.get("files", {}).get(candidate)
            if entry:
                return entry

    return None
Ejemplo n.º 14
0
def get_cache_keys(filename, release, dist):
    dist_name = dist and dist.name or None
    releasefile_ident = ReleaseFile.get_ident(filename, dist_name)
    cache_key = get_release_file_cache_key(release_id=release.id,
                                           releasefile_ident=releasefile_ident)

    # Cache key to store file metadata, currently only the size of the
    # compressed version of file. We cannot use the cache_key because large
    # payloads (silently) fail to cache due to e.g. memcached payload size
    # limitation and we use the meta data to avoid compression of such a files.
    cache_key_meta = get_release_file_cache_key_meta(
        release_id=release.id, releasefile_ident=releasefile_ident)

    return cache_key, cache_key_meta
Ejemplo n.º 15
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5(filename).hexdigest(),
    )
    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)
    if result is None:
        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)
        ident = ReleaseFile.get_ident(filename)
        try:
            releasefile = ReleaseFile.objects.filter(
                release=release,
                ident=ident,
            ).select_related('file', 'file__blob').get()
        except ReleaseFile.DoesNotExist:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with releasefile.file.getfile() as fp:
                z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(unicode(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            # Write the compressed version to cache, but return the deflated version
            cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600)
            result = (releasefile.file.headers, body, 200)
    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # We got a cache hit, but the body is compressed, so we
        # need to decompress it before handing it off
        body = zlib.decompress(result[1])
        result = (result[0], body, result[2])

    return result
Ejemplo n.º 16
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5(filename).hexdigest(),
    )
    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)
    if result is None:
        logger.debug('Checking database for release artifact %r (release_id=%s)',
                     filename, release.id)
        ident = ReleaseFile.get_ident(filename)
        try:
            releasefile = ReleaseFile.objects.filter(
                release=release,
                ident=ident,
            ).select_related('file').get()
        except ReleaseFile.DoesNotExist:
            logger.debug('Release artifact %r not found in database (release_id=%s)',
                         filename, release.id)
            cache.set(cache_key, -1, 60)
            return None

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with releasefile.file.getfile() as fp:
                z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(unicode(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            # Write the compressed version to cache, but return the deflated version
            cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600)
            result = (releasefile.file.headers, body, 200)
    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # We got a cache hit, but the body is compressed, so we
        # need to decompress it before handing it off
        body = zlib.decompress(result[1])
        result = (result[0], body, result[2])

    return result
Ejemplo n.º 17
0
def get_artifact_index(release, dist):
    dist_name = dist and dist.name or None

    ident = ReleaseFile.get_ident(ARTIFACT_INDEX_FILENAME, dist_name)
    cache_key = f"artifact-index:v1:{release.id}:{ident}"
    result = cache.get(cache_key)
    if result == -1:
        index = None
    elif result:
        index = json.loads(result)
    else:
        index = read_artifact_index(release, dist, use_cache=True)
        cache_value = -1 if index is None else json.dumps(index)
        # Only cache for a short time to keep the manifest up-to-date
        cache.set(cache_key, cache_value, timeout=60)

    return index
Ejemplo n.º 18
0
def fetch_release_archive(release, dist) -> Optional[IO]:
    """Fetch release archive and cache if possible.

    If return value is not empty, the caller is responsible for closing the stream.
    """
    dist_name = dist and dist.name or None
    releasefile_ident = ReleaseFile.get_ident(RELEASE_ARCHIVE_FILENAME,
                                              dist_name)
    cache_key = get_release_file_cache_key(release_id=release.id,
                                           releasefile_ident=releasefile_ident)

    result = cache.get(cache_key)

    if result == -1:
        return None
    elif result:
        return BytesIO(result)
    else:
        qs = ReleaseFile.objects.filter(
            release=release, dist=dist,
            ident=releasefile_ident).select_related("file")
        try:
            releasefile = qs[0]
        except IndexError:
            # Cache as nonexistent:
            cache.set(cache_key, -1, 60)
            return None
        else:
            try:
                file_ = fetch_retry_policy(
                    lambda: ReleaseFile.cache.getfile(releasefile))
            except Exception:
                logger.error("sourcemaps.read_archive_failed",
                             exc_info=sys.exc_info())

                return None

            # This will implicitly skip too large payloads.
            cache.set(cache_key, file_.read(), 3600)
            file_.seek(0)

            return file_
Ejemplo n.º 19
0
    def test_simple(self):
        self.login_as(user=self.user)

        project = self.create_project(name='foo')

        release = Release.objects.create(
            organization_id=project.organization_id,
            version='1',
        )
        release.add_project(project)

        releasefile = ReleaseFile.objects.create(
            organization_id=project.organization_id,
            release=release,
            file=File.objects.create(
                name='application.js',
                type='release.file',
            ),
            name='http://example.com/application.js'
        )

        url = reverse(
            'sentry-api-0-project-release-file-details',
            kwargs={
                'organization_slug': project.organization.slug,
                'project_slug': project.slug,
                'version': release.version,
                'file_id': releasefile.id,
            }
        )

        response = self.client.put(url, {
            'name': 'foobar',
        })

        assert response.status_code == 200, response.content
        assert response.data['id'] == six.text_type(releasefile.id)

        releasefile = ReleaseFile.objects.get(id=releasefile.id)
        assert releasefile.name == 'foobar'
        assert releasefile.ident == ReleaseFile.get_ident('foobar')
Ejemplo n.º 20
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:%s:%s' % (
        release.id,
        md5(filename).hexdigest(),
    )
    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)
    if result is None:
        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)
        ident = ReleaseFile.get_ident(filename)
        try:
            releasefile = ReleaseFile.objects.filter(
                release=release,
                ident=ident,
            ).select_related('file').get()
        except ReleaseFile.DoesNotExist:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with releasefile.file.getfile() as fp:
                body = fp.read()
        except Exception as e:
            logger.exception(unicode(e))
            result = -1
        else:
            result = (releasefile.file.headers, body, 200)
        cache.set(cache_key, result, 300)

    if result == -1:
        result = None

    return result
Ejemplo n.º 21
0
    def test_simple(self):
        project = self.create_project(name="foo")

        release = Release.objects.create(project=project, organization_id=project.organization_id, version="1")
        release.add_project(project)

        url = reverse(
            "sentry-api-0-release-files",
            kwargs={
                "organization_slug": project.organization.slug,
                "project_slug": project.slug,
                "version": release.version,
            },
        )

        self.login_as(user=self.user)

        response = self.client.post(
            url,
            {
                "name": "http://example.com/application.js",
                "header": "X-SourceMap: http://example.com",
                "file": SimpleUploadedFile("application.js", b"function() { }", content_type="application/javascript"),
            },
            format="multipart",
        )

        assert response.status_code == 201, response.content

        releasefile = ReleaseFile.objects.get(release=release)
        assert releasefile.name == "http://example.com/application.js"
        assert releasefile.ident == ReleaseFile.get_ident("http://example.com/application.js")
        assert releasefile.file.headers == {
            "Content-Type": "application/javascript",
            "X-SourceMap": "http://example.com",
        }
Ejemplo n.º 22
0
def fetch_release_file(filename, release, dist=None):
    cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), )

    logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id)
    result = cache.get(cache_key)

    dist_name = dist and dist.name or None

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            'Checking database for release artifact %r (release_id=%s)', filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                dist=dist,
                ident__in=filename_idents,
            ).select_related('file')
        )

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)', filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next((
                rf
                for ident in filename_idents
                for rf in possible_files
                if rf.ident == ident
            ))

        logger.debug(
            'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id
        )
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
Ejemplo n.º 23
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                ident__in=filename_idents,
            ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next(
                (f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {
                k.lower(): v
                for k, v in releasefile.file.headers.items()
            }
            encoding = get_encoding_from_headers(headers)
            result = (headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = (result[0], zlib.decompress(result[1]), result[2], encoding)

    return result
Ejemplo n.º 24
0
def fetch_release_file(filename, release, dist=None):
    cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), )

    logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id)
    result = cache.get(cache_key)

    dist_name = dist and dist.name or None

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            'Checking database for release artifact %r (release_id=%s)', filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                dist=dist,
                ident__in=filename_idents,
            ).select_related('file')
        )

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)', filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next((
                rf
                for ident in filename_idents
                for rf in possible_files
                if rf.ident == ident
            ))

        logger.debug(
            'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id
        )
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception:
            logger.error('sourcemap.compress_read_failed', exc_info=sys.exc_info())
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result
Ejemplo n.º 25
0
def fetch_release_file(filename, release, dist=None):
    """
    Attempt to retrieve a release artifact from the database.

    Caches the result of that attempt (whether successful or not).
    """
    dist_name = dist and dist.name or None
    cache_key, cache_key_meta = get_cache_keys(filename, release, dist)

    logger.debug("Checking cache for release artifact %r (release_id=%s)",
                 filename, release.id)
    result = cache.get(cache_key)

    # not in the cache (meaning we haven't checked the database recently), so check the database
    if result is None:
        with metrics.timer("sourcemaps.release_artifact_from_file"):
            filename_choices = ReleaseFile.normalize(filename)
            filename_idents = [
                ReleaseFile.get_ident(f, dist_name) for f in filename_choices
            ]

            logger.debug(
                "Checking database for release artifact %r (release_id=%s)",
                filename, release.id)

            possible_files = list(
                ReleaseFile.objects.filter(
                    release_id=release.id,
                    dist_id=dist.id if dist else dist,
                    ident__in=filename_idents,
                ).select_related("file"))

            if len(possible_files) == 0:
                logger.debug(
                    "Release artifact %r not found in database (release_id=%s)",
                    filename,
                    release.id,
                )
                cache.set(cache_key, -1, 60)
                return None

            elif len(possible_files) == 1:
                releasefile = possible_files[0]

            else:
                # Pick first one that matches in priority order.
                # This is O(N*M) but there are only ever at most 4 things here
                # so not really worth optimizing.
                releasefile = next(rf for ident in filename_idents
                                   for rf in possible_files
                                   if rf.ident == ident)

            logger.debug(
                "Found release artifact %r (id=%s, release_id=%s)",
                filename,
                releasefile.id,
                release.id,
            )

            result = fetch_and_cache_artifact(
                filename,
                lambda: ReleaseFile.cache.getfile(releasefile),
                cache_key,
                cache_key_meta,
                releasefile.file.headers,
                compress_file,
            )

    # in the cache as an unsuccessful attempt
    elif result == -1:
        result = None

    # in the cache as a successful attempt, including the zipped contents of the file
    else:
        result = result_from_cache(filename, result)

    return result
Ejemplo n.º 26
0
def _simple_update(release_file: ReleaseFile, new_file: File,
                   new_archive: ReleaseArchive):
    """ Update function used in _upsert_release_file """
    old_file = release_file.file
    release_file.update(file=new_file)
    old_file.delete()
Ejemplo n.º 27
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug(
            'Checking database for release artifact %r (release_id=%s)',
            filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release,
                ident__in=filename_idents,
            ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug(
                'Release artifact %r not found in database (release_id=%s)',
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next(
                (f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            try:
                result = (releasefile.file.headers, body.decode('utf-8'), 200)
            except UnicodeDecodeError:
                error = {
                    'type': EventError.JS_INVALID_SOURCE_ENCODING,
                    'value': 'utf8',
                    'url': expose_url(releasefile.name),
                }
                raise CannotFetchSource(error)
            else:
                # Write the compressed version to cache, but return the deflated version
                cache.set(cache_key, (releasefile.file.headers, z_body, 200),
                          3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # We got a cache hit, but the body is compressed, so we
        # need to decompress it before handing it off
        body = zlib.decompress(result[1])
        try:
            result = (result[0], body.decode('utf-8'), result[2])
        except UnicodeDecodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(releasefile.name),
            }
            raise CannotFetchSource(error)

    return result
Ejemplo n.º 28
0
    def test_compression(self, mock_compress_file):
        """
        For files larger than max memcached payload size we want to avoid
        pointless compression and  caching attempt since it fails silently.

        Tests scenarios:

        - happy path where compressed file is successfully cached
        - compressed payload is too large to cache and we will avoid
          compression and caching while the metadata cache exists

        """
        project = self.project
        release = Release.objects.create(
            organization_id=project.organization_id, version="abc")
        release.add_project(project)

        filename = "file.min.js"
        file = File.objects.create(
            name=filename,
            type="release.file",
            headers={"Content-Type": "application/json; charset=utf-8"},
        )

        binary_body = unicode_body.encode("utf-8")
        file.putfile(BytesIO(binary_body))

        ReleaseFile.objects.create(name="file.min.js",
                                   release=release,
                                   organization_id=project.organization_id,
                                   file=file)

        mock_compress_file.return_value = (binary_body, binary_body)

        releasefile_ident = ReleaseFile.get_ident(filename, None)
        cache_key = get_release_file_cache_key(
            release_id=release.id, releasefile_ident=releasefile_ident)
        cache_key_meta = get_release_file_cache_key_meta(
            release_id=release.id, releasefile_ident=releasefile_ident)

        fetch_release_file(filename, release)

        # Here the ANY is File() retrieved from cache/db
        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)

        # Remove cache and check that calling fetch_release_file will do the
        # compression and caching again

        cache.set(cache_key, None)
        mock_compress_file.reset_mock()

        fetch_release_file(filename, release)

        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)

        # If the file is bigger than the max cache value threshold, avoid
        # compression and caching
        cache.set(cache_key, None)
        mock_compress_file.reset_mock()
        with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE",
                   len(binary_body) - 1):
            result = fetch_release_file(filename, release)

        assert result == http.UrlResult(
            filename,
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        assert mock_compress_file.mock_calls == []
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key) is None

        # If the file is bigger than the max cache value threshold, but the
        # metadata cache is empty as well, compress and attempt to cache anyway
        cache.set(cache_key, None)
        cache.set(cache_key_meta, None)
        mock_compress_file.reset_mock()
        with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE",
                   len(binary_body) - 1):
            result = fetch_release_file(filename, release)

        assert result == http.UrlResult(
            filename,
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)

        # If the file is smaller than the max cache value threshold, but the
        # cache is empty, compress and cache
        cache.set(cache_key, None)
        mock_compress_file.reset_mock()
        with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE",
                   len(binary_body) + 1):
            result = fetch_release_file(filename, release)

        assert result == http.UrlResult(
            filename,
            {"content-type": "application/json; charset=utf-8"},
            binary_body,
            200,
            "utf-8",
        )

        assert mock_compress_file.mock_calls == [call(ANY)]
        assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body)
        assert cache.get(cache_key)
Ejemplo n.º 29
0
def fetch_release_file(filename, release, dist=None):
    """
    Attempt to retrieve a release artifact from the database.

    Caches the result of that attempt (whether successful or not).
    """

    dist_name = dist and dist.name or None
    releasefile_ident = ReleaseFile.get_ident(filename, dist_name)
    cache_key = get_release_file_cache_key(release_id=release.id,
                                           releasefile_ident=releasefile_ident)
    # Cache key to store file metadata, currently only the size of the
    # compressed version of file. We cannot use the cache_key because large
    # payloads (silently) fail to cache due to e.g. memcached payload size
    # limitation and we use the meta data to avoid compression of such a files.
    cache_key_meta = get_release_file_cache_key_meta(
        release_id=release.id, releasefile_ident=releasefile_ident)

    logger.debug("Checking cache for release artifact %r (release_id=%s)",
                 filename, release.id)
    result = cache.get(cache_key)

    # not in the cache (meaning we haven't checked the database recently), so check the database
    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [
            ReleaseFile.get_ident(f, dist_name) for f in filename_choices
        ]

        logger.debug(
            "Checking database for release artifact %r (release_id=%s)",
            filename, release.id)

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release, dist=dist,
                ident__in=filename_idents).select_related("file"))

        if len(possible_files) == 0:
            logger.debug(
                "Release artifact %r not found in database (release_id=%s)",
                filename, release.id)
            cache.set(cache_key, -1, 60)
            return None

        elif len(possible_files) == 1:
            releasefile = possible_files[0]

        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next(rf for ident in filename_idents
                               for rf in possible_files if rf.ident == ident)

        logger.debug("Found release artifact %r (id=%s, release_id=%s)",
                     filename, releasefile.id, release.id)

        # If the release file is not in cache, check if we can retrieve at
        # least the size metadata from cache and prevent compression and
        # caching if payload exceeds the backend limit.
        z_body_size = None

        if CACHE_MAX_VALUE_SIZE:
            cache_meta = cache.get(cache_key_meta)
            if cache_meta:
                z_body_size = int(cache_meta.get("compressed_size"))

        def fetch_release_body():
            with ReleaseFile.cache.getfile(releasefile) as fp:
                if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE:
                    return None, fp.read()
                else:
                    return compress_file(fp)

        try:
            with metrics.timer("sourcemaps.release_file_read"):
                z_body, body = fetch_retry_policy(fetch_release_body)
        except Exception:
            logger.error("sourcemap.compress_read_failed",
                         exc_info=sys.exc_info())
            result = None
        else:
            headers = {
                k.lower(): v
                for k, v in releasefile.file.headers.items()
            }
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)

            # If we don't have the compressed body for caching because the
            # cached metadata said it is too large payload for the cache
            # backend, do not attempt to cache.
            if z_body:
                # This will implicitly skip too large payloads. Those will be cached
                # on the file system by `ReleaseFile.cache`, instead.
                cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

                # In case the previous call to cache implicitly fails, we use
                # the meta data to avoid pointless compression which is done
                # only for caching.
                cache.set(cache_key_meta, {"compressed_size": len(z_body)},
                          3600)

    # in the cache as an unsuccessful attempt
    elif result == -1:
        result = None

    # in the cache as a successful attempt, including the zipped contents of the file
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(filename, result[0],
                                zlib.decompress(result[1]), result[2],
                                encoding)

    return result
Ejemplo n.º 30
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug('Checking database for release artifact %r (release_id=%s)',
                     filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(ReleaseFile.objects.filter(
            release=release,
            ident__in=filename_idents,
        ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug('Release artifact %r not found in database (release_id=%s)',
                         filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next((f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with releasefile.file.getfile() as fp:
                z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            try:
                result = (releasefile.file.headers, body.decode('utf-8'), 200)
            except UnicodeDecodeError:
                error = {
                    'type': EventError.JS_INVALID_SOURCE_ENCODING,
                    'value': 'utf8',
                    'url': expose_url(releasefile.name),
                }
                raise CannotFetchSource(error)
            else:
                # Write the compressed version to cache, but return the deflated version
                cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # We got a cache hit, but the body is compressed, so we
        # need to decompress it before handing it off
        body = zlib.decompress(result[1])
        try:
            result = (result[0], body.decode('utf-8'), result[2])
        except UnicodeDecodeError:
            error = {
                'type': EventError.JS_INVALID_SOURCE_ENCODING,
                'value': 'utf8',
                'url': expose_url(releasefile.name),
            }
            raise CannotFetchSource(error)

    return result
Ejemplo n.º 31
0
def fetch_release_file(filename, release):
    cache_key = 'releasefile:v1:%s:%s' % (
        release.id,
        md5_text(filename).hexdigest(),
    )

    filename_path = None
    if filename is not None:
        # Reconstruct url without protocol + host
        # e.g. http://example.com/foo?bar => ~/foo?bar
        parsed_url = urlparse(filename)
        filename_path = '~' + parsed_url.path
        if parsed_url.query:
            filename_path += '?' + parsed_url.query

    logger.debug('Checking cache for release artifact %r (release_id=%s)',
                 filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        logger.debug('Checking database for release artifact %r (release_id=%s)',
                     filename, release.id)

        filename_idents = [ReleaseFile.get_ident(filename)]
        if filename_path is not None and filename_path != filename:
            filename_idents.append(ReleaseFile.get_ident(filename_path))

        possible_files = list(ReleaseFile.objects.filter(
            release=release,
            ident__in=filename_idents,
        ).select_related('file'))

        if len(possible_files) == 0:
            logger.debug('Release artifact %r not found in database (release_id=%s)',
                         filename, release.id)
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Prioritize releasefile that matches full url (w/ host)
            # over hostless releasefile
            target_ident = filename_idents[0]
            releasefile = next((f for f in possible_files if f.ident == target_ident))

        logger.debug('Found release artifact %r (id=%s, release_id=%s)',
                     filename, releasefile.id, release.id)
        try:
            with metrics.timer('sourcemaps.release_file_read'):
                with releasefile.file.getfile() as fp:
                    z_body, body = compress_file(fp)
        except Exception as e:
            logger.exception(six.text_type(e))
            cache.set(cache_key, -1, 3600)
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = (headers, body, 200, encoding)
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = (result[0], zlib.decompress(result[1]), result[2], encoding)

    return result
Ejemplo n.º 32
0
def fetch_release_file(filename, release, dist=None):
    dist_name = dist and dist.name or None
    cache_key = "releasefile:v1:%s:%s" % (release.id, ReleaseFile.get_ident(filename, dist_name))

    logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id)
    result = cache.get(cache_key)

    if result is None:
        filename_choices = ReleaseFile.normalize(filename)
        filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices]

        logger.debug(
            "Checking database for release artifact %r (release_id=%s)", filename, release.id
        )

        possible_files = list(
            ReleaseFile.objects.filter(
                release=release, dist=dist, ident__in=filename_idents
            ).select_related("file")
        )

        if len(possible_files) == 0:
            logger.debug(
                "Release artifact %r not found in database (release_id=%s)", filename, release.id
            )
            cache.set(cache_key, -1, 60)
            return None
        elif len(possible_files) == 1:
            releasefile = possible_files[0]
        else:
            # Pick first one that matches in priority order.
            # This is O(N*M) but there are only ever at most 4 things here
            # so not really worth optimizing.
            releasefile = next(
                (rf for ident in filename_idents for rf in possible_files if rf.ident == ident)
            )

        logger.debug(
            "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id
        )
        try:
            with metrics.timer("sourcemaps.release_file_read"):
                with ReleaseFile.cache.getfile(releasefile) as fp:
                    z_body, body = compress_file(fp)
        except Exception:
            logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info())
            result = None
        else:
            headers = {k.lower(): v for k, v in releasefile.file.headers.items()}
            encoding = get_encoding_from_headers(headers)
            result = http.UrlResult(filename, headers, body, 200, encoding)
            # This will implicitly skip too large payloads. Those will be cached
            # on the file system by `ReleaseFile.cache`, instead.
            cache.set(cache_key, (headers, z_body, 200, encoding), 3600)

    elif result == -1:
        # We cached an error, so normalize
        # it down to None
        result = None
    else:
        # Previous caches would be a 3-tuple instead of a 4-tuple,
        # so this is being maintained for backwards compatibility
        try:
            encoding = result[3]
        except IndexError:
            encoding = None
        result = http.UrlResult(
            filename, result[0], zlib.decompress(result[1]), result[2], encoding
        )

    return result