def _merge_archives(release_file: ReleaseFile, new_file: File, new_archive: ReleaseArchive): max_attempts = RELEASE_ARCHIVE_MAX_MERGE_ATTEMPTS success = False for attempt in range(max_attempts): old_file = release_file.file with ReleaseArchive(old_file.getfile().file) as old_archive: buffer = BytesIO() merge_release_archives(old_archive, new_archive, buffer) replacement = File.objects.create(name=old_file.name, type=old_file.type) buffer.seek(0) replacement.putfile(buffer) with transaction.atomic(): release_file.refresh_from_db() if release_file.file == old_file: # Nothing has changed. It is safe to update release_file.update(file=replacement) success = True break else: metrics.incr("tasks.assemble.merge_archives_retry", instance=str(attempt)) else: logger.error("Failed to merge archive in %s attempts, giving up.", max_attempts) if success: old_file.delete() new_file.delete()
def _merge_archives(release_file: ReleaseFile, new_file: File, new_archive: ReleaseArchive): lock_key = f"assemble:merge_archives:{release_file.id}" lock = app.locks.get(lock_key, duration=60) try: with lock.blocking_acquire(RELEASE_ARCHIVE_MERGE_INITIAL_DELAY, RELEASE_ARCHIVE_MERGE_TIMEOUT): old_file = release_file.file old_file_contents = ReleaseFile.cache.getfile(release_file) buffer = BytesIO() with metrics.timer("tasks.assemble.merge_archives_pure"): did_merge = merge_release_archives(old_file_contents, new_archive, buffer) if did_merge: replacement = File.objects.create(name=old_file.name, type=old_file.type) buffer.seek(0) replacement.putfile(buffer) release_file.update(file=replacement) old_file.delete() except UnableToAcquireLock as error: logger.error("merge_archives.fail", extra={"error": error}) new_file.delete()
def _simple_update(release_file: ReleaseFile, new_file: File, new_archive: ReleaseArchive, additional_fields: dict) -> bool: """Update function used in _upsert_release_file""" old_file = release_file.file release_file.update(file=new_file, **additional_fields) old_file.delete() return True
def test_simple(self): project = self.create_project(name="foo") release = Release.objects.create(organization_id=project.organization_id, version="1") release.add_project(project) url = reverse( "sentry-api-0-organization-release-files", kwargs={"organization_slug": project.organization.slug, "version": release.version}, ) self.login_as(user=self.user) response = self.client.post( url, { "name": "http://example.com/application.js", "header": "X-SourceMap: http://example.com", "file": SimpleUploadedFile( "application.js", b"function() { }", content_type="application/javascript" ), }, format="multipart", ) assert response.status_code == 201, response.content releasefile = ReleaseFile.objects.get(release=release) assert releasefile.name == "http://example.com/application.js" assert releasefile.ident == ReleaseFile.get_ident("http://example.com/application.js") assert releasefile.file.headers == { "Content-Type": "application/javascript", "X-SourceMap": "http://example.com", }
def fetch_release_file(filename, release): cache_key = "releasefile:%s:%s" % (release.id, md5(filename).hexdigest()) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) if result is None: logger.debug("Checking database for release artifact %r (release_id=%s)", filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ( ReleaseFile.objects.filter(release=release, ident=ident).select_related("file", "file__blob").get() ) except ReleaseFile.DoesNotExist: logger.debug("Release artifact %r not found in database (release_id=%s)", filename, release.id) cache.set(cache_key, -1, 60) return None logger.debug("Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: body = fp.read() except Exception as e: logger.exception(unicode(e)) result = -1 else: result = (releasefile.file.headers, body, 200) cache.set(cache_key, result, 3600) if result == -1: result = None return result
def test_simple(self): self.login_as(user=self.user) project = self.create_project(name="foo") release = Release.objects.create( organization_id=project.organization_id, version="1") release.add_project(project) releasefile = ReleaseFile.objects.create( organization_id=project.organization_id, release=release, file=File.objects.create(name="application.js", type="release.file"), name="http://example.com/application.js", ) url = reverse( "sentry-api-0-organization-release-file-details", kwargs={ "organization_slug": project.organization.slug, "version": release.version, "file_id": releasefile.id, }, ) response = self.client.put(url, {"name": "foobar"}) assert response.status_code == 200, response.content assert response.data["id"] == six.text_type(releasefile.id) releasefile = ReleaseFile.objects.get(id=releasefile.id) assert releasefile.name == "foobar" assert releasefile.ident == ReleaseFile.get_ident("foobar")
def test_simple(self): project = self.create_project(name='foo') release = Release.objects.create( organization_id=project.organization_id, version='1', ) release.add_project(project) url = reverse('sentry-api-0-organization-release-files', kwargs={ 'organization_slug': project.organization.slug, 'version': release.version, }) self.login_as(user=self.user) response = self.client.post(url, { 'name': 'http://example.com/application.js', 'header': 'X-SourceMap: http://example.com', 'file': SimpleUploadedFile('application.js', b'function() { }', content_type='application/javascript'), }, format='multipart') assert response.status_code == 201, response.content releasefile = ReleaseFile.objects.get(release=release) assert releasefile.name == 'http://example.com/application.js' assert releasefile.ident == ReleaseFile.get_ident('http://example.com/application.js') assert releasefile.file.headers == { 'Content-Type': 'application/javascript', 'X-SourceMap': 'http://example.com', }
def test_simple(self): project = self.create_project(name='foo') release = Release.objects.create( organization_id=project.organization_id, version='1', ) release.add_project(project) url = reverse('sentry-api-0-release-files', kwargs={ 'organization_slug': project.organization.slug, 'project_slug': project.slug, 'version': release.version, }) self.login_as(user=self.user) response = self.client.post(url, { 'name': 'http://example.com/application.js', 'header': 'X-SourceMap: http://example.com', 'file': SimpleUploadedFile('application.js', b'function() { }', content_type='application/javascript'), }, format='multipart') assert response.status_code == 201, response.content releasefile = ReleaseFile.objects.get(release=release) assert releasefile.name == 'http://example.com/application.js' assert releasefile.ident == ReleaseFile.get_ident('http://example.com/application.js') assert releasefile.file.headers == { 'Content-Type': 'application/javascript', 'X-SourceMap': 'http://example.com', }
def fetch_release_file(filename, release): cache_key = 'release:%s:%s' % ( release.id, hashlib.sha1(filename.encode('utf-8')).hexdigest(), ) logger.debug('Checking cache for release artfiact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file').get() except ReleaseFile.DoesNotExist: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) with releasefile.file.getfile() as fp: body = fp.read() result = (releasefile.file.headers, body, 200) cache.set(cache_key, result, 60) return result
def fetch_release_file(filename, release): cache_key = 'release:%s:%s' % ( release.version, hashlib.sha1(filename.encode('utf-8')).hexdigest(), ) logger.debug('Checking cache for release artfiact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file').get() except ReleaseFile.DoesNotExist: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) with releasefile.file.getfile() as fp: body = fp.read() result = (releasefile.file.headers, body, 200) cache.set(cache_key, result, 60) return result
def get_from_archive(url: str, archive: ReleaseArchive) -> Tuple[bytes, dict]: candidates = ReleaseFile.normalize(url) for candidate in candidates: try: return archive.get_file_by_url(candidate) except KeyError: pass # None of the filenames matched raise KeyError(f"Not found in archive: '{url}'")
def pseudo_releasefile(url, info, dist): """Create a pseudo-ReleaseFile from an ArtifactIndex entry""" return ReleaseFile( name=url, file=File( headers=info.get("headers", {}), size=info["size"], timestamp=info["date_created"], checksum=info["sha1"], ), dist_id=dist.id if dist else dist, )
def get_index_entry(release, dist, url) -> Optional[dict]: try: index = get_artifact_index(release, dist) except Exception as exc: logger.error("sourcemaps.index_read_failed", exc_info=exc) return None if index: for candidate in ReleaseFile.normalize(url): entry = index.get("files", {}).get(candidate) if entry: return entry return None
def get_cache_keys(filename, release, dist): dist_name = dist and dist.name or None releasefile_ident = ReleaseFile.get_ident(filename, dist_name) cache_key = get_release_file_cache_key(release_id=release.id, releasefile_ident=releasefile_ident) # Cache key to store file metadata, currently only the size of the # compressed version of file. We cannot use the cache_key because large # payloads (silently) fail to cache due to e.g. memcached payload size # limitation and we use the meta data to avoid compression of such a files. cache_key_meta = get_release_file_cache_key_meta( release_id=release.id, releasefile_ident=releasefile_ident) return cache_key, cache_key_meta
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file', 'file__blob').get() except ReleaseFile.DoesNotExist: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(unicode(e)) cache.set(cache_key, -1, 3600) result = None else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) result = (releasefile.file.headers, body, 200) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) result = (result[0], body, result[2]) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file').get() except ReleaseFile.DoesNotExist: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(unicode(e)) cache.set(cache_key, -1, 3600) result = None else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) result = (releasefile.file.headers, body, 200) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) result = (result[0], body, result[2]) return result
def get_artifact_index(release, dist): dist_name = dist and dist.name or None ident = ReleaseFile.get_ident(ARTIFACT_INDEX_FILENAME, dist_name) cache_key = f"artifact-index:v1:{release.id}:{ident}" result = cache.get(cache_key) if result == -1: index = None elif result: index = json.loads(result) else: index = read_artifact_index(release, dist, use_cache=True) cache_value = -1 if index is None else json.dumps(index) # Only cache for a short time to keep the manifest up-to-date cache.set(cache_key, cache_value, timeout=60) return index
def fetch_release_archive(release, dist) -> Optional[IO]: """Fetch release archive and cache if possible. If return value is not empty, the caller is responsible for closing the stream. """ dist_name = dist and dist.name or None releasefile_ident = ReleaseFile.get_ident(RELEASE_ARCHIVE_FILENAME, dist_name) cache_key = get_release_file_cache_key(release_id=release.id, releasefile_ident=releasefile_ident) result = cache.get(cache_key) if result == -1: return None elif result: return BytesIO(result) else: qs = ReleaseFile.objects.filter( release=release, dist=dist, ident=releasefile_ident).select_related("file") try: releasefile = qs[0] except IndexError: # Cache as nonexistent: cache.set(cache_key, -1, 60) return None else: try: file_ = fetch_retry_policy( lambda: ReleaseFile.cache.getfile(releasefile)) except Exception: logger.error("sourcemaps.read_archive_failed", exc_info=sys.exc_info()) return None # This will implicitly skip too large payloads. cache.set(cache_key, file_.read(), 3600) file_.seek(0) return file_
def test_simple(self): self.login_as(user=self.user) project = self.create_project(name='foo') release = Release.objects.create( organization_id=project.organization_id, version='1', ) release.add_project(project) releasefile = ReleaseFile.objects.create( organization_id=project.organization_id, release=release, file=File.objects.create( name='application.js', type='release.file', ), name='http://example.com/application.js' ) url = reverse( 'sentry-api-0-project-release-file-details', kwargs={ 'organization_slug': project.organization.slug, 'project_slug': project.slug, 'version': release.version, 'file_id': releasefile.id, } ) response = self.client.put(url, { 'name': 'foobar', }) assert response.status_code == 200, response.content assert response.data['id'] == six.text_type(releasefile.id) releasefile = ReleaseFile.objects.get(id=releasefile.id) assert releasefile.name == 'foobar' assert releasefile.ident == ReleaseFile.get_ident('foobar')
def fetch_release_file(filename, release): cache_key = 'releasefile:%s:%s' % ( release.id, md5(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) ident = ReleaseFile.get_ident(filename) try: releasefile = ReleaseFile.objects.filter( release=release, ident=ident, ).select_related('file').get() except ReleaseFile.DoesNotExist: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: body = fp.read() except Exception as e: logger.exception(unicode(e)) result = -1 else: result = (releasefile.file.headers, body, 200) cache.set(cache_key, result, 300) if result == -1: result = None return result
def test_simple(self): project = self.create_project(name="foo") release = Release.objects.create(project=project, organization_id=project.organization_id, version="1") release.add_project(project) url = reverse( "sentry-api-0-release-files", kwargs={ "organization_slug": project.organization.slug, "project_slug": project.slug, "version": release.version, }, ) self.login_as(user=self.user) response = self.client.post( url, { "name": "http://example.com/application.js", "header": "X-SourceMap: http://example.com", "file": SimpleUploadedFile("application.js", b"function() { }", content_type="application/javascript"), }, format="multipart", ) assert response.status_code == 201, response.content releasefile = ReleaseFile.objects.get(release=release) assert releasefile.name == "http://example.com/application.js" assert releasefile.ident == ReleaseFile.get_ident("http://example.com/application.js") assert releasefile.file.headers == { "Content-Type": "application/javascript", "X-SourceMap": "http://example.com", }
def fetch_release_file(filename, release, dist=None): cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) dist_name = dist and dist.name or None if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents, ).select_related('file') ) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(( rf for ident in filename_idents for rf in possible_files if rf.ident == ident )) logger.debug( 'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id ) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list( ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next( (f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = { k.lower(): v for k, v in releasefile.file.headers.items() } encoding = get_encoding_from_headers(headers) result = (headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = (result[0], zlib.decompress(result[1]), result[2], encoding) return result
def fetch_release_file(filename, release, dist=None): cache_key = 'releasefile:v1:%s:%s' % (release.id, md5_text(filename).hexdigest(), ) logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) dist_name = dist and dist.name or None if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents, ).select_related('file') ) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(( rf for ident in filename_idents for rf in possible_files if rf.ident == ident )) logger.debug( 'Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id ) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception: logger.error('sourcemap.compress_read_failed', exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result
def fetch_release_file(filename, release, dist=None): """ Attempt to retrieve a release artifact from the database. Caches the result of that attempt (whether successful or not). """ dist_name = dist and dist.name or None cache_key, cache_key_meta = get_cache_keys(filename, release, dist) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) # not in the cache (meaning we haven't checked the database recently), so check the database if result is None: with metrics.timer("sourcemaps.release_artifact_from_file"): filename_choices = ReleaseFile.normalize(filename) filename_idents = [ ReleaseFile.get_ident(f, dist_name) for f in filename_choices ] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id) possible_files = list( ReleaseFile.objects.filter( release_id=release.id, dist_id=dist.id if dist else dist, ident__in=filename_idents, ).select_related("file")) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id, ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(rf for ident in filename_idents for rf in possible_files if rf.ident == ident) logger.debug( "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id, ) result = fetch_and_cache_artifact( filename, lambda: ReleaseFile.cache.getfile(releasefile), cache_key, cache_key_meta, releasefile.file.headers, compress_file, ) # in the cache as an unsuccessful attempt elif result == -1: result = None # in the cache as a successful attempt, including the zipped contents of the file else: result = result_from_cache(filename, result) return result
def _simple_update(release_file: ReleaseFile, new_file: File, new_archive: ReleaseArchive): """ Update function used in _upsert_release_file """ old_file = release_file.file release_file.update(file=new_file) old_file.delete()
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug( 'Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list( ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug( 'Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next( (f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: try: result = (releasefile.file.headers, body.decode('utf-8'), 200) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) try: result = (result[0], body.decode('utf-8'), result[2]) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) return result
def test_compression(self, mock_compress_file): """ For files larger than max memcached payload size we want to avoid pointless compression and caching attempt since it fails silently. Tests scenarios: - happy path where compressed file is successfully cached - compressed payload is too large to cache and we will avoid compression and caching while the metadata cache exists """ project = self.project release = Release.objects.create( organization_id=project.organization_id, version="abc") release.add_project(project) filename = "file.min.js" file = File.objects.create( name=filename, type="release.file", headers={"Content-Type": "application/json; charset=utf-8"}, ) binary_body = unicode_body.encode("utf-8") file.putfile(BytesIO(binary_body)) ReleaseFile.objects.create(name="file.min.js", release=release, organization_id=project.organization_id, file=file) mock_compress_file.return_value = (binary_body, binary_body) releasefile_ident = ReleaseFile.get_ident(filename, None) cache_key = get_release_file_cache_key( release_id=release.id, releasefile_ident=releasefile_ident) cache_key_meta = get_release_file_cache_key_meta( release_id=release.id, releasefile_ident=releasefile_ident) fetch_release_file(filename, release) # Here the ANY is File() retrieved from cache/db assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) # Remove cache and check that calling fetch_release_file will do the # compression and caching again cache.set(cache_key, None) mock_compress_file.reset_mock() fetch_release_file(filename, release) assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) # If the file is bigger than the max cache value threshold, avoid # compression and caching cache.set(cache_key, None) mock_compress_file.reset_mock() with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE", len(binary_body) - 1): result = fetch_release_file(filename, release) assert result == http.UrlResult( filename, {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) assert mock_compress_file.mock_calls == [] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) is None # If the file is bigger than the max cache value threshold, but the # metadata cache is empty as well, compress and attempt to cache anyway cache.set(cache_key, None) cache.set(cache_key_meta, None) mock_compress_file.reset_mock() with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE", len(binary_body) - 1): result = fetch_release_file(filename, release) assert result == http.UrlResult( filename, {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key) # If the file is smaller than the max cache value threshold, but the # cache is empty, compress and cache cache.set(cache_key, None) mock_compress_file.reset_mock() with patch("sentry.lang.javascript.processor.CACHE_MAX_VALUE_SIZE", len(binary_body) + 1): result = fetch_release_file(filename, release) assert result == http.UrlResult( filename, {"content-type": "application/json; charset=utf-8"}, binary_body, 200, "utf-8", ) assert mock_compress_file.mock_calls == [call(ANY)] assert cache.get(cache_key_meta)["compressed_size"] == len(binary_body) assert cache.get(cache_key)
def fetch_release_file(filename, release, dist=None): """ Attempt to retrieve a release artifact from the database. Caches the result of that attempt (whether successful or not). """ dist_name = dist and dist.name or None releasefile_ident = ReleaseFile.get_ident(filename, dist_name) cache_key = get_release_file_cache_key(release_id=release.id, releasefile_ident=releasefile_ident) # Cache key to store file metadata, currently only the size of the # compressed version of file. We cannot use the cache_key because large # payloads (silently) fail to cache due to e.g. memcached payload size # limitation and we use the meta data to avoid compression of such a files. cache_key_meta = get_release_file_cache_key_meta( release_id=release.id, releasefile_ident=releasefile_ident) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) # not in the cache (meaning we haven't checked the database recently), so check the database if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ ReleaseFile.get_ident(f, dist_name) for f in filename_choices ] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents).select_related("file")) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next(rf for ident in filename_idents for rf in possible_files if rf.ident == ident) logger.debug("Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id) # If the release file is not in cache, check if we can retrieve at # least the size metadata from cache and prevent compression and # caching if payload exceeds the backend limit. z_body_size = None if CACHE_MAX_VALUE_SIZE: cache_meta = cache.get(cache_key_meta) if cache_meta: z_body_size = int(cache_meta.get("compressed_size")) def fetch_release_body(): with ReleaseFile.cache.getfile(releasefile) as fp: if z_body_size and z_body_size > CACHE_MAX_VALUE_SIZE: return None, fp.read() else: return compress_file(fp) try: with metrics.timer("sourcemaps.release_file_read"): z_body, body = fetch_retry_policy(fetch_release_body) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = { k.lower(): v for k, v in releasefile.file.headers.items() } encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # If we don't have the compressed body for caching because the # cached metadata said it is too large payload for the cache # backend, do not attempt to cache. if z_body: # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) # In case the previous call to cache implicitly fails, we use # the meta data to avoid pointless compression which is done # only for caching. cache.set(cache_key_meta, {"compressed_size": len(z_body)}, 3600) # in the cache as an unsuccessful attempt elif result == -1: result = None # in the cache as a successful attempt, including the zipped contents of the file else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult(filename, result[0], zlib.decompress(result[1]), result[2], encoding) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list(ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next((f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: try: result = (releasefile.file.headers, body.decode('utf-8'), 200) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) else: # Write the compressed version to cache, but return the deflated version cache.set(cache_key, (releasefile.file.headers, z_body, 200), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # We got a cache hit, but the body is compressed, so we # need to decompress it before handing it off body = zlib.decompress(result[1]) try: result = (result[0], body.decode('utf-8'), result[2]) except UnicodeDecodeError: error = { 'type': EventError.JS_INVALID_SOURCE_ENCODING, 'value': 'utf8', 'url': expose_url(releasefile.name), } raise CannotFetchSource(error) return result
def fetch_release_file(filename, release): cache_key = 'releasefile:v1:%s:%s' % ( release.id, md5_text(filename).hexdigest(), ) filename_path = None if filename is not None: # Reconstruct url without protocol + host # e.g. http://example.com/foo?bar => ~/foo?bar parsed_url = urlparse(filename) filename_path = '~' + parsed_url.path if parsed_url.query: filename_path += '?' + parsed_url.query logger.debug('Checking cache for release artifact %r (release_id=%s)', filename, release.id) result = cache.get(cache_key) if result is None: logger.debug('Checking database for release artifact %r (release_id=%s)', filename, release.id) filename_idents = [ReleaseFile.get_ident(filename)] if filename_path is not None and filename_path != filename: filename_idents.append(ReleaseFile.get_ident(filename_path)) possible_files = list(ReleaseFile.objects.filter( release=release, ident__in=filename_idents, ).select_related('file')) if len(possible_files) == 0: logger.debug('Release artifact %r not found in database (release_id=%s)', filename, release.id) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Prioritize releasefile that matches full url (w/ host) # over hostless releasefile target_ident = filename_idents[0] releasefile = next((f for f in possible_files if f.ident == target_ident)) logger.debug('Found release artifact %r (id=%s, release_id=%s)', filename, releasefile.id, release.id) try: with metrics.timer('sourcemaps.release_file_read'): with releasefile.file.getfile() as fp: z_body, body = compress_file(fp) except Exception as e: logger.exception(six.text_type(e)) cache.set(cache_key, -1, 3600) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = (headers, body, 200, encoding) cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = (result[0], zlib.decompress(result[1]), result[2], encoding) return result
def fetch_release_file(filename, release, dist=None): dist_name = dist and dist.name or None cache_key = "releasefile:v1:%s:%s" % (release.id, ReleaseFile.get_ident(filename, dist_name)) logger.debug("Checking cache for release artifact %r (release_id=%s)", filename, release.id) result = cache.get(cache_key) if result is None: filename_choices = ReleaseFile.normalize(filename) filename_idents = [ReleaseFile.get_ident(f, dist_name) for f in filename_choices] logger.debug( "Checking database for release artifact %r (release_id=%s)", filename, release.id ) possible_files = list( ReleaseFile.objects.filter( release=release, dist=dist, ident__in=filename_idents ).select_related("file") ) if len(possible_files) == 0: logger.debug( "Release artifact %r not found in database (release_id=%s)", filename, release.id ) cache.set(cache_key, -1, 60) return None elif len(possible_files) == 1: releasefile = possible_files[0] else: # Pick first one that matches in priority order. # This is O(N*M) but there are only ever at most 4 things here # so not really worth optimizing. releasefile = next( (rf for ident in filename_idents for rf in possible_files if rf.ident == ident) ) logger.debug( "Found release artifact %r (id=%s, release_id=%s)", filename, releasefile.id, release.id ) try: with metrics.timer("sourcemaps.release_file_read"): with ReleaseFile.cache.getfile(releasefile) as fp: z_body, body = compress_file(fp) except Exception: logger.error("sourcemap.compress_read_failed", exc_info=sys.exc_info()) result = None else: headers = {k.lower(): v for k, v in releasefile.file.headers.items()} encoding = get_encoding_from_headers(headers) result = http.UrlResult(filename, headers, body, 200, encoding) # This will implicitly skip too large payloads. Those will be cached # on the file system by `ReleaseFile.cache`, instead. cache.set(cache_key, (headers, z_body, 200, encoding), 3600) elif result == -1: # We cached an error, so normalize # it down to None result = None else: # Previous caches would be a 3-tuple instead of a 4-tuple, # so this is being maintained for backwards compatibility try: encoding = result[3] except IndexError: encoding = None result = http.UrlResult( filename, result[0], zlib.decompress(result[1]), result[2], encoding ) return result