def test_locking(self): release = self.release dist = None update1 = self._create_update_fn(0, 2, {"foo": "bar"}, create=True) update2 = self._create_update_fn(1, 2, {"123": "xyz"}, create=True) threads = [Thread(target=update1), Thread(target=update2)] for thread in threads: thread.start() for thread in threads: thread.join() # Without locking, only key "123" would survive: assert read_artifact_index(release, dist)["files"].keys() == {"foo", "123"} # Only one `File` was created: assert File.objects.filter(name=ARTIFACT_INDEX_FILENAME).count() == 1 def delete(): sleep(2 * self.tick) delete_from_artifact_index(release, dist, "foo") update3 = self._create_update_fn(1, 2, {"abc": "666"}, create=True) threads = [Thread(target=update3), Thread(target=delete)] for thread in threads: thread.start() for thread in threads: thread.join() # Without locking, the delete would be surpassed by the slow update: assert read_artifact_index(release, dist)["files"].keys() == {"123", "abc"}
def _entry_from_index(release: Release, dist: Optional[Distribution], url: str) -> ReleaseFile: index = read_artifact_index(release, dist) if index is None: raise ResourceDoesNotExist try: return index.get("files", {})[url] except KeyError: raise ResourceDoesNotExist
def test_same_sha(self): """Stand-alone release file has same sha1 as one in manifest""" self.create_archive(fields={}, files={"foo": "bar"}) file_ = File.objects.create() file_.putfile(BytesIO(b"bar")) self.create_release_file(file=file_) index = read_artifact_index(self.release, None) assert file_.checksum == index["files"]["fake://foo"]["sha1"]
def get_releasefiles(self, request, release, organization_id): query = request.GET.getlist("query") data_sources = [] # Exclude files which are also present in archive: file_list = ReleaseFile.public_objects.filter( release_id=release.id).exclude(artifact_count=0) file_list = file_list.select_related("file").order_by("name") if query: if not isinstance(query, list): query = [query] condition = Q(name__icontains=query[0]) for name in query[1:]: condition |= Q(name__icontains=name) file_list = file_list.filter(condition) data_sources.append(file_list.order_by("name")) # Get contents of release archive as well: dists = Distribution.objects.filter(organization_id=organization_id, release=release) for dist in list(dists) + [None]: try: # Only Read from artifact index if it has a positive artifact count artifact_index = read_artifact_index(release, dist, artifact_count__gt=0) except Exception as exc: logger.error("Failed to read artifact index", exc_info=exc) artifact_index = None if artifact_index is not None: files = artifact_index.get("files", {}) source = ArtifactSource(dist, files, query) data_sources.append(source) def on_results(r): return serialize(load_dist(r), request.user) # NOTE: Returned release files are ordered by name within their block, # (i.e. per index file), but not overall return self.paginate( request=request, sources=data_sources, paginator_cls=ChainPaginator, max_offset=MAX_RELEASE_FILES_OFFSET, on_results=on_results, )
def test_artifacts(self): bundle_file = self.create_artifact_bundle() blob1 = FileBlob.from_file(ContentFile(bundle_file)) total_checksum = sha1(bundle_file).hexdigest() for min_files in (10, 1): with self.options({ "processing.release-archive-min-files": min_files, }): ReleaseFile.objects.filter(release_id=self.release.id).delete() assert self.release.count_artifacts() == 0 assemble_artifacts( org_id=self.organization.id, version=self.release.version, checksum=total_checksum, chunks=[blob1.checksum], ) assert self.release.count_artifacts() == 2 status, details = get_assemble_status(AssembleTask.ARTIFACTS, self.organization.id, total_checksum) assert status == ChunkFileState.OK assert details is None if min_files == 1: # An archive was saved index = read_artifact_index(self.release, dist=None) archive_ident = index["files"]["~/index.js"][ "archive_ident"] releasefile = ReleaseFile.objects.get( release_id=self.release.id, ident=archive_ident) # Artifact is the same as original bundle assert releasefile.file.size == len(bundle_file) else: # Individual files were saved release_file = ReleaseFile.objects.get( organization_id=self.organization.id, release_id=self.release.id, name="~/index.js", dist_id=None, ) assert release_file.file.headers == { "Sourcemap": "index.js.map" }
def test_lock_existing(self): release = self.release dist = None with _ArtifactIndexGuard(release, dist).writable_data(create=True) as data: data.update_files({"0": 0}) update1 = self._create_update_fn(0, 2, {"foo": "bar"}, create=False) update2 = self._create_update_fn(1, 2, {"123": "xyz"}, create=False) threads = [Thread(target=update1), Thread(target=update2)] for thread in threads: thread.start() for thread in threads: thread.join() # Without locking, only keys "0", "123" would survive: assert read_artifact_index( release, dist)["files"].keys() == {"0", "foo", "123"} def delete(): sleep(2 * self.tick) delete_from_artifact_index(release, dist, "foo") update3 = self._create_update_fn(1, 2, {"abc": "666"}, create=False) threads = [Thread(target=update3), Thread(target=delete)] for thread in threads: thread.start() for thread in threads: thread.join() # Without locking, the delete would be surpassed by the slow update: assert read_artifact_index( release, dist)["files"].keys() == {"0", "123", "abc"}
def get_artifact_index(release, dist): dist_name = dist and dist.name or None ident = ReleaseFile.get_ident(ARTIFACT_INDEX_FILENAME, dist_name) cache_key = f"artifact-index:v1:{release.id}:{ident}" result = cache.get(cache_key) if result == -1: index = None elif result: index = json.loads(result) else: index = read_artifact_index(release, dist, use_cache=True) cache_value = -1 if index is None else json.dumps(index) # Only cache for a short time to keep the manifest up-to-date cache.set(cache_key, cache_value, timeout=60) return index
def test_multi_archive(self): assert read_artifact_index(self.release, None) is None # Delete does nothing assert delete_from_artifact_index(self.release, None, "foo") is False archive1 = self.create_archive( fields={}, files={ "foo": "foo", "bar": "bar", "baz": "bazaa", }, ) assert read_artifact_index(self.release, None) == { "files": { "fake://bar": { "archive_ident": archive1.ident, "date_created": "2021-06-11T09:13:01.317902Z", "filename": "bar", "sha1": "62cdb7020ff920e5aa642c3d4066950dd1f01f4d", "size": 3, }, "fake://baz": { "archive_ident": archive1.ident, "date_created": "2021-06-11T09:13:01.317902Z", "filename": "baz", "sha1": "1a74885aa2771a6a0edcc80dbd0cf396dfaf1aab", "size": 5, }, "fake://foo": { "archive_ident": archive1.ident, "date_created": "2021-06-11T09:13:01.317902Z", "filename": "foo", "sha1": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", "size": 3, }, }, } # See if creating a second manifest interferes: dist = Distribution.objects.create( organization_id=self.organization.id, release_id=self.release.id, name="foo") self.create_archive(fields={}, files={"xyz": "123"}, dist=dist) archive2 = self.create_archive( fields={}, files={ "foo": "foo", "bar": "BAR", "zap": "zapz", }, ) # Two files were overwritten, one was added expected = { "files": { "fake://bar": { "archive_ident": archive2.ident, "date_created": "2021-06-11T09:13:01.317902Z", "filename": "bar", "sha1": "a5d5c1bba91fdb6c669e1ae0413820885bbfc455", "size": 3, }, "fake://baz": { "archive_ident": archive1.ident, "date_created": "2021-06-11T09:13:01.317902Z", "filename": "baz", "sha1": "1a74885aa2771a6a0edcc80dbd0cf396dfaf1aab", "size": 5, }, "fake://foo": { "archive_ident": archive2.ident, "date_created": "2021-06-11T09:13:01.317902Z", "filename": "foo", "sha1": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", "size": 3, }, "fake://zap": { "archive_ident": archive2.ident, "date_created": "2021-06-11T09:13:01.317902Z", "filename": "zap", "sha1": "a7a9c12205f9cb1f53f8b6678265c9e8158f2a8f", "size": 4, }, }, } assert read_artifact_index(self.release, None) == expected # Deletion works: assert delete_from_artifact_index(self.release, None, "fake://foo") is True expected["files"].pop("fake://foo") assert read_artifact_index(self.release, None) == expected