def _store_single_files(archive: ReleaseArchive, meta: dict, count_as_artifacts: bool): try: temp_dir = archive.extract() except Exception: raise AssembleArtifactsError("failed to extract bundle") with temp_dir: artifacts = archive.manifest.get("files", {}) for rel_path, artifact in artifacts.items(): artifact_url = artifact.get("url", rel_path) artifact_basename = get_artifact_basename(artifact_url) file = File.objects.create(name=artifact_basename, type="release.file", headers=artifact.get("headers", {})) full_path = path.join(temp_dir.name, rel_path) with open(full_path, "rb") as fp: file.putfile(fp, logger=logger) kwargs = dict(meta, name=artifact_url) extra_fields = {"artifact_count": 1 if count_as_artifacts else 0} _upsert_release_file(file, None, _simple_update, kwargs, extra_fields)
def _merge_archives(release_file: ReleaseFile, new_file: File, new_archive: ReleaseArchive): max_attempts = RELEASE_ARCHIVE_MAX_MERGE_ATTEMPTS success = False for attempt in range(max_attempts): old_file = release_file.file with ReleaseArchive(old_file.getfile().file) as old_archive: buffer = BytesIO() merge_release_archives(old_archive, new_archive, buffer) replacement = File.objects.create(name=old_file.name, type=old_file.type) buffer.seek(0) replacement.putfile(buffer) with transaction.atomic(): release_file.refresh_from_db() if release_file.file == old_file: # Nothing has changed. It is safe to update release_file.update(file=replacement) success = True break else: metrics.incr("tasks.assemble.merge_archives_retry", instance=str(attempt)) else: logger.error("Failed to merge archive in %s attempts, giving up.", max_attempts) if success: old_file.delete() new_file.delete()
def get_from_archive(url: str, archive: ReleaseArchive) -> Tuple[bytes, dict]: candidates = ReleaseFile.normalize(url) for candidate in candidates: try: return archive.get_file_by_url(candidate) except KeyError: pass # None of the filenames matched raise KeyError(f"Not found in archive: '{url}'")
def test_merge_archives_same(self): file1 = File.objects.create(name="foo") file1.putfile(ContentFile(self.create_artifact_bundle())) file2 = File.objects.create(name="foo") file2.putfile(ContentFile(self.create_artifact_bundle())) release_file = ReleaseFile.objects.create( organization=self.organization, release=self.release, file=file1, ) with ReleaseArchive(file2.getfile().file) as archive2: _merge_archives(release_file, file2, archive2) # Both archives contain the same files, so old archive remains assert File.objects.filter(pk=file1.pk).exists() assert not File.objects.filter(pk=file2.pk).exists() assert ReleaseFile.objects.get(pk=release_file.pk).file == file1
def test_merge_archives(self): file1 = File.objects.create() file1.putfile(ContentFile(self.create_artifact_bundle())) file2 = File.objects.create() file2.putfile(ContentFile(self.create_artifact_bundle())) release_file = ReleaseFile.objects.create( organization=self.organization, release=self.release, file=file1, ) with ReleaseArchive(file2.getfile().file) as archive2: _merge_archives(release_file, file2, archive2) # Both files have disappeared, a new one has taken their place: assert not File.objects.filter(pk=file1.pk).exists() assert not File.objects.filter(pk=file2.pk).exists() assert release_file.file.pk > 2
def test_merge_archives_fail(self, mock_log_error, _): file1 = File.objects.create() file1.putfile(ContentFile(self.create_artifact_bundle())) file2 = File.objects.create() file2.putfile(ContentFile(self.create_artifact_bundle())) release_file = ReleaseFile.objects.create( organization=self.organization, release=self.release, file=file1, ) with ReleaseArchive(file2.getfile().file) as archive2: _merge_archives(release_file, file2, archive2) # Failed to update assert File.objects.filter(pk=file1.pk).exists() assert ReleaseFile.objects.get(pk=release_file.pk).file == file1 assert not File.objects.filter(pk=file2.pk).exists() assert mock_log_error.called_with("merge_archives.fail")
def _store_single_files(archive: ReleaseArchive, meta: dict): try: temp_dir = archive.extract() except BaseException: raise AssembleArtifactsError("failed to extract bundle") with temp_dir: artifacts = archive.manifest.get("files", {}) for rel_path, artifact in artifacts.items(): artifact_url = artifact.get("url", rel_path) artifact_basename = artifact_url.rsplit("/", 1)[-1] file = File.objects.create(name=artifact_basename, type="release.file", headers=artifact.get("headers", {})) full_path = path.join(temp_dir.name, rel_path) with open(full_path, "rb") as fp: file.putfile(fp, logger=logger) kwargs = dict(meta, name=artifact_url) _upsert_release_file(file, None, _simple_update, **kwargs)
def test_merge_archives_fail(self, mock_incr, mock_refresh, mock_log_error): max_attempts = RELEASE_ARCHIVE_MAX_MERGE_ATTEMPTS file1 = File.objects.create() file1.putfile(ContentFile(self.create_artifact_bundle())) file2 = File.objects.create() file2.putfile(ContentFile(self.create_artifact_bundle())) release_file = ReleaseFile.objects.create( organization=self.organization, release=self.release, file=file1, ) with ReleaseArchive(file2.getfile().file) as archive2: def change_file_id(): # Create another file release_file.file = File.objects.create() release_file.file.putfile(ContentFile(self.create_artifact_bundle())) mock_refresh.side_effect = change_file_id _merge_archives(release_file, file2, archive2) assert mock_refresh.called # Failed to update assert File.objects.filter(pk=file1.pk).exists() assert ReleaseFile.objects.get(pk=release_file.pk).file == file1 assert not File.objects.filter(pk=file2.pk).exists() for attempt in range(max_attempts): assert mock_incr.called_with( "tasks.assemble.merge_archives_retry", instance=str(attempt) ) assert mock_log_error.called_with( f"Failed to merge archive in {max_attempts} attempts, giving up." )
def assemble_artifacts(org_id, version, checksum, chunks, **kwargs): """ Creates release files from an uploaded artifact bundle. """ try: organization = Organization.objects.get_from_cache(pk=org_id) bind_organization_context(organization) set_assemble_status(AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.ASSEMBLING) # Assemble the chunks into a temporary file rv = assemble_file( AssembleTask.ARTIFACTS, organization, RELEASE_ARCHIVE_FILENAME, checksum, chunks, file_type="release.bundle", ) # If not file has been created this means that the file failed to # assemble because of bad input data. In this case, assemble_file # has set the assemble status already. if rv is None: return bundle, temp_file = rv try: archive = ReleaseArchive(temp_file) except BaseException: raise AssembleArtifactsError("failed to open release manifest") with archive: manifest = archive.manifest org_slug = manifest.get("org") if organization.slug != org_slug: raise AssembleArtifactsError( "organization does not match uploaded bundle") release_name = manifest.get("release") if release_name != version: raise AssembleArtifactsError( "release does not match uploaded bundle") try: release = Release.objects.get(organization_id=organization.id, version=release_name) except Release.DoesNotExist: raise AssembleArtifactsError("release does not exist") dist_name = manifest.get("dist") dist = None if dist_name: dist = release.add_dist(dist_name) meta = { # Required for release file creation "organization_id": organization.id, "release": release, "dist": dist, } if options.get("processing.save-release-archives"): kwargs = dict(meta, name=RELEASE_ARCHIVE_FILENAME) _upsert_release_file(bundle, archive, _merge_archives, **kwargs) # NOTE(jjbayer): Single files are still stored to enable # rolling back from release archives. Once release archives run # smoothely, this call can be removed / only called when feature # flag is off. _store_single_files(archive, meta) # Count files extracted, to compare them to release files endpoint metrics.incr("tasks.assemble.extracted_files", amount=len(manifest.get("files", {}))) except AssembleArtifactsError as e: set_assemble_status(AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.ERROR, detail=str(e)) except BaseException: logger.error("failed to assemble release bundle", exc_info=True) set_assemble_status( AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.ERROR, detail="internal server error", ) else: set_assemble_status(AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.OK)
def fetch_release_artifact(url, release, dist): """ Get a release artifact either by extracting it or fetching it directly. If a release archive was saved, the individual file will be extracted from the archive. """ cache_key, cache_key_meta = get_cache_keys(url, release, dist) result = cache.get(cache_key) if result == -1: # Cached as unavailable return None if result: return result_from_cache(url, result) start = time.monotonic() archive_file = fetch_release_archive_for_url(release, dist, url) if archive_file is not None: try: archive = ReleaseArchive(archive_file) except Exception as exc: logger.error("Failed to initialize archive for release %s", release.id, exc_info=exc) # TODO(jjbayer): cache error and return here else: with archive: try: fp, headers = get_from_archive(url, archive) except KeyError: # The manifest mapped the url to an archive, but the file # is not there. logger.error("Release artifact %r not found in archive %s", url, archive_file.id) cache.set(cache_key, -1, 60) metrics.timing("sourcemaps.release_artifact_from_archive", time.monotonic() - start) return None except Exception as exc: logger.error("Failed to read %s from release %s", url, release.id, exc_info=exc) # TODO(jjbayer): cache error and return here else: result = fetch_and_cache_artifact( url, lambda: fp, cache_key, cache_key_meta, headers, # Cannot use `compress_file` because `ZipExtFile` does not support chunks compress_fn=compress, ) metrics.timing("sourcemaps.release_artifact_from_archive", time.monotonic() - start) return result # Fall back to maintain compatibility with old releases and versions of # sentry-cli which upload files individually result = fetch_release_file(url, release, dist) return result
def assemble_artifacts(org_id, version, checksum, chunks, **kwargs): """ Creates release files from an uploaded artifact bundle. """ try: organization = Organization.objects.get_from_cache(pk=org_id) bind_organization_context(organization) set_assemble_status(AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.ASSEMBLING) archive_filename = f"release-artifacts-{uuid.uuid4().hex}.zip" # Assemble the chunks into a temporary file rv = assemble_file( AssembleTask.ARTIFACTS, organization, archive_filename, checksum, chunks, file_type="release.bundle", ) # If not file has been created this means that the file failed to # assemble because of bad input data. In this case, assemble_file # has set the assemble status already. if rv is None: return bundle, temp_file = rv try: archive = ReleaseArchive(temp_file) except Exception: raise AssembleArtifactsError("failed to open release manifest") with archive: manifest = archive.manifest org_slug = manifest.get("org") if organization.slug != org_slug: raise AssembleArtifactsError( "organization does not match uploaded bundle") release_name = manifest.get("release") if release_name != version: raise AssembleArtifactsError( "release does not match uploaded bundle") try: release = Release.objects.get(organization_id=organization.id, version=release_name) except Release.DoesNotExist: raise AssembleArtifactsError("release does not exist") dist_name = manifest.get("dist") dist = None if dist_name: dist = release.add_dist(dist_name) num_files = len(manifest.get("files", {})) meta = { # Required for release file creation "organization_id": organization.id, "release_id": release.id, "dist_id": dist.id if dist else dist, } saved_as_archive = False min_size = options.get("processing.release-archive-min-files") if num_files >= min_size: try: update_artifact_index(release, dist, bundle) saved_as_archive = True except Exception as exc: logger.error("Unable to update artifact index", exc_info=exc) if not saved_as_archive: _store_single_files(archive, meta, True) # Count files extracted, to compare them to release files endpoint metrics.incr("tasks.assemble.extracted_files", amount=num_files) except AssembleArtifactsError as e: set_assemble_status(AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.ERROR, detail=str(e)) except Exception: logger.error("failed to assemble release bundle", exc_info=True) set_assemble_status( AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.ERROR, detail="internal server error", ) else: set_assemble_status(AssembleTask.ARTIFACTS, org_id, checksum, ChunkFileState.OK)