def test_assemble_from_files(self): files = [] file_checksum = sha1() for _ in range(8): blob = os.urandom(1024 * 1024 * 8) hash = sha1(blob).hexdigest() file_checksum.update(blob) files.append((io.BytesIO(blob), hash)) # upload all blobs FileBlob.from_files(files, organization=self.organization) # find all blobs for reference, checksum in files: blob = FileBlob.objects.get(checksum=checksum) ref_bytes = reference.getvalue() assert blob.getfile().read(len(ref_bytes)) == ref_bytes FileBlobOwner.objects.filter( blob=blob, organization_id=self.organization.id).get() rv = assemble_file( AssembleTask.DIF, self.project, "testfile", file_checksum.hexdigest(), [x[1] for x in files], "dummy.type", ) assert rv is not None f, tmp = rv assert f.checksum == file_checksum.hexdigest() assert f.type == "dummy.type" # upload all blobs a second time for f, _ in files: f.seek(0) FileBlob.from_files(files, organization=self.organization) # assemble a second time f = assemble_file( AssembleTask.DIF, self.project, "testfile", file_checksum.hexdigest(), [x[1] for x in files], "dummy.type", )[0] assert f.checksum == file_checksum.hexdigest()
def test_assemble_duplicate_blobs(self): files = [] file_checksum = sha1() blob = os.urandom(1024 * 1024 * 8) hash = sha1(blob).hexdigest() for _ in xrange(8): file_checksum.update(blob) files.append((io.BytesIO(blob), hash)) # upload all blobs FileBlob.from_files(files, organization=self.organization) # find all blobs for reference, checksum in files: blob = FileBlob.objects.get(checksum=checksum) ref_bytes = reference.getvalue() assert blob.getfile().read(len(ref_bytes)) == ref_bytes FileBlobOwner.objects.filter( blob=blob, organization=self.organization ).get() rv = assemble_file(AssembleTask.DIF, self.project, 'testfile', file_checksum.hexdigest(), [x[1] for x in files], 'dummy.type') assert rv is not None f, tmp = rv assert f.checksum == file_checksum.hexdigest() assert f.type == 'dummy.type'
def test_assemble_from_files(self): files = [] file_checksum = sha1() for _ in xrange(8): blob = os.urandom(1024 * 1024 * 8) hash = sha1(blob).hexdigest() file_checksum.update(blob) files.append((io.BytesIO(blob), hash)) # upload all blobs FileBlob.from_files(files, organization=self.organization) # find all blobs for reference, checksum in files: blob = FileBlob.objects.get(checksum=checksum) ref_bytes = reference.getvalue() assert blob.getfile().read(len(ref_bytes)) == ref_bytes FileBlobOwner.objects.filter( blob=blob, organization=self.organization ).get() rv = assemble_file( self.project, 'testfile', file_checksum.hexdigest(), [x[1] for x in files], 'dummy.type') assert rv is not None f, tmp = rv assert f.checksum == file_checksum.hexdigest() assert f.type == 'dummy.type' # upload all blobs a second time for f, _ in files: f.seek(0) FileBlob.from_files(files, organization=self.organization) # assemble a second time f = assemble_file( self.project, 'testfile', file_checksum.hexdigest(), [x[1] for x in files], 'dummy.type')[0] assert f.checksum == file_checksum.hexdigest()
def test_assemble(self, mock_assemble_dif): content1 = 'foo'.encode('utf-8') fileobj1 = ContentFile(content1) checksum1 = sha1(content1).hexdigest() content2 = 'bar'.encode('utf-8') fileobj2 = ContentFile(content2) checksum2 = sha1(content2).hexdigest() content3 = 'baz'.encode('utf-8') fileobj3 = ContentFile(content3) checksum3 = sha1(content3).hexdigest() total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums blob1 = FileBlob.from_file(fileobj1) FileBlobOwner.objects.get_or_create( organization=self.organization, blob=blob1 ) blob3 = FileBlob.from_file(fileobj3) FileBlobOwner.objects.get_or_create( organization=self.organization, blob=blob3 ) blob2 = FileBlob.from_file(fileobj2) # we make a request now but we are missing ownership for chunk 2 response = self.client.post( self.url, data={ total_checksum: { 'name': 'test', 'chunks': [ checksum2, checksum1, checksum3 ] } }, HTTP_AUTHORIZATION=u'Bearer {}'.format(self.token.token) ) assert response.status_code == 200, response.content assert response.data[total_checksum]['state'] == ChunkFileState.NOT_FOUND assert response.data[total_checksum]['missingChunks'] == [checksum2] # we add ownership to chunk 2 FileBlobOwner.objects.get_or_create( organization=self.organization, blob=blob2 ) # new request, ownership for all chunks is there but file does not exist yet response = self.client.post( self.url, data={ total_checksum: { 'name': 'test', 'chunks': [ checksum2, checksum1, checksum3 ], } }, HTTP_AUTHORIZATION=u'Bearer {}'.format(self.token.token) ) assert response.status_code == 200, response.content assert response.data[total_checksum]['state'] == ChunkFileState.CREATED assert response.data[total_checksum]['missingChunks'] == [] chunks = [checksum2, checksum1, checksum3] mock_assemble_dif.apply_async.assert_called_once_with( kwargs={ 'project_id': self.project.id, 'name': 'test', 'chunks': chunks, 'checksum': total_checksum, } ) file = assemble_file(self.project, 'test', total_checksum, chunks, 'project.dsym')[0] assert get_assemble_status(self.project, total_checksum)[0] != ChunkFileState.ERROR assert file.checksum == total_checksum file_blob_index = FileBlobIndex.objects.all() assert len(file_blob_index) == 3
def test_assemble(self, mock_assemble_dif): content1 = b"foo" fileobj1 = ContentFile(content1) checksum1 = sha1(content1).hexdigest() content2 = b"bar" fileobj2 = ContentFile(content2) checksum2 = sha1(content2).hexdigest() content3 = b"baz" fileobj3 = ContentFile(content3) checksum3 = sha1(content3).hexdigest() total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums blob1 = FileBlob.from_file(fileobj1) FileBlobOwner.objects.get_or_create( organization_id=self.organization.id, blob=blob1) blob3 = FileBlob.from_file(fileobj3) FileBlobOwner.objects.get_or_create( organization_id=self.organization.id, blob=blob3) blob2 = FileBlob.from_file(fileobj2) # we make a request now but we are missing ownership for chunk 2 response = self.client.post( self.url, data={ total_checksum: { "name": "test", "chunks": [checksum2, checksum1, checksum3] } }, HTTP_AUTHORIZATION=f"Bearer {self.token.token}", ) assert response.status_code == 200, response.content assert response.data[total_checksum][ "state"] == ChunkFileState.NOT_FOUND assert response.data[total_checksum]["missingChunks"] == [checksum2] # we add ownership to chunk 2 FileBlobOwner.objects.get_or_create( organization_id=self.organization.id, blob=blob2) # new request, ownership for all chunks is there but file does not exist yet response = self.client.post( self.url, data={ total_checksum: { "name": "test", "chunks": [checksum2, checksum1, checksum3] } }, HTTP_AUTHORIZATION=f"Bearer {self.token.token}", ) assert response.status_code == 200, response.content assert response.data[total_checksum]["state"] == ChunkFileState.CREATED assert response.data[total_checksum]["missingChunks"] == [] chunks = [checksum2, checksum1, checksum3] mock_assemble_dif.apply_async.assert_called_once_with( kwargs={ "project_id": self.project.id, "name": "test", "chunks": chunks, "checksum": total_checksum, "debug_id": None, }) file = assemble_file(AssembleTask.DIF, self.project, "test", total_checksum, chunks, "project.dif")[0] status, _ = get_assemble_status(AssembleTask.DIF, self.project.id, total_checksum) assert status != ChunkFileState.ERROR assert file.checksum == total_checksum file_blob_index = FileBlobIndex.objects.all() assert len(file_blob_index) == 3
def test_assemble(self, mock_assemble_dif): content1 = 'foo'.encode('utf-8') fileobj1 = ContentFile(content1) checksum1 = sha1(content1).hexdigest() content2 = 'bar'.encode('utf-8') fileobj2 = ContentFile(content2) checksum2 = sha1(content2).hexdigest() content3 = 'baz'.encode('utf-8') fileobj3 = ContentFile(content3) checksum3 = sha1(content3).hexdigest() total_checksum = sha1(content2 + content1 + content3).hexdigest() # The order here is on purpose because we check for the order of checksums blob1 = FileBlob.from_file(fileobj1) FileBlobOwner.objects.get_or_create( organization=self.organization, blob=blob1 ) blob3 = FileBlob.from_file(fileobj3) FileBlobOwner.objects.get_or_create( organization=self.organization, blob=blob3 ) blob2 = FileBlob.from_file(fileobj2) # we make a request now but we are missing ownership for chunk 2 response = self.client.post( self.url, data={ total_checksum: { 'name': 'test', 'chunks': [ checksum2, checksum1, checksum3 ] } }, HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token) ) assert response.status_code == 200, response.content assert response.data[total_checksum]['state'] == ChunkFileState.NOT_FOUND assert response.data[total_checksum]['missingChunks'] == [checksum2] # we add ownership to chunk 2 FileBlobOwner.objects.get_or_create( organization=self.organization, blob=blob2 ) # new request, ownership for all chunks is there but file does not exist yet response = self.client.post( self.url, data={ total_checksum: { 'name': 'test', 'chunks': [ checksum2, checksum1, checksum3 ], } }, HTTP_AUTHORIZATION='Bearer {}'.format(self.token.token) ) assert response.status_code == 200, response.content assert response.data[total_checksum]['state'] == ChunkFileState.CREATED assert response.data[total_checksum]['missingChunks'] == [] chunks = [checksum2, checksum1, checksum3] mock_assemble_dif.apply_async.assert_called_once_with( kwargs={ 'project_id': self.project.id, 'name': 'test', 'chunks': chunks, 'checksum': total_checksum, } ) file = assemble_file(self.project, 'test', total_checksum, chunks, 'project.dsym')[0] assert get_assemble_status(self.project, total_checksum)[0] != ChunkFileState.ERROR assert file.checksum == total_checksum file_blob_index = FileBlobIndex.objects.all() assert len(file_blob_index) == 3