def test_streaming_threaded_parts(self): # This is similar to the basic threaded parts test but instead # the thread has to wait to know exactly how many parts are # expected from the stream. This is indicated when the expected # parts of the context changes from ... to an integer. self.context = MultipartUploadContext(expected_parts='...') upload_part_thread = threading.Thread(target=self.upload_part, args=(1, )) # Once this thread starts it will immediately block. self.start_thread(upload_part_thread) # Also, let's start the thread that will do the complete # multipart upload. It will also block because it needs all # the parts so it's blocked up the upload_part_thread. It also # needs the upload_id so it's blocked on that as well. complete_upload_thread = threading.Thread(target=self.complete_upload) self.start_thread(complete_upload_thread) # Then finally the CreateMultipartUpload completes and we # announce the upload id. self.create_upload('my_upload_id') # The complete upload thread should still be waiting for an expect # parts number. with self.call_lock: was_completed = (len(self.calls) > 2) # The upload_part thread can now proceed as well as the complete # multipart upload thread. self.context.announce_total_parts(1) self.join_threads() self.assertIsNone(self.caught_exception) # Make sure that the completed task was never called since it was # waiting to announce the parts. self.assertFalse(was_completed) # We can verify that the invariants still hold. self.assertEqual(len(self.calls), 3) # First there should be three calls, create, upload, complete. self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[1][0], 'upload_part') self.assertEqual(self.calls[2][0], 'complete_upload') # Verify the correct args were used. self.assertEqual(self.calls[0][1], 'my_upload_id') self.assertEqual(self.calls[1][1:], (1, 'my_upload_id')) self.assertEqual(self.calls[2][1:], ('my_upload_id', [{ 'ETag': 'etag1', 'PartNumber': 1 }]))
def test_randomized_stress_test(self): # Now given that we've verified the functionality from # the two tests above, we randomize the threading to ensure # that the order doesn't actually matter. The invariant that # the CreateMultipartUpload is called first, then UploadPart # operations are called with the appropriate upload_id, then # CompleteMultipartUpload with the appropriate upload_id and # parts list should hold true regardless of how the threads # are ordered. # I've run this with much larger values, but 100 is a good # tradeoff with coverage vs. execution time. for i in range(100): expected_parts = random.randint(2, 50) self.context = MultipartUploadContext( expected_parts=expected_parts) self.threads = [] self.calls = [] all_threads = [ threading.Thread(target=self.complete_upload), threading.Thread(target=self.create_upload, args=('my_upload_id', )), threading.Thread(target=self.wait_for_upload_complete), ] for i in range(1, expected_parts + 1): all_threads.append( threading.Thread(target=self.upload_part, args=(i, ))) random.shuffle(all_threads) for thread in all_threads: self.start_thread(thread) self.join_threads() self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[-1][0], 'arbitrary_post_complete_operation') self.assertEqual(self.calls[-2][0], 'complete_upload') parts = set() for call in self.calls[1:-2]: self.assertEqual(call[0], 'upload_part') self.assertEqual(call[2], 'my_upload_id') parts.add(call[1]) self.assertEqual(len(parts), expected_parts)
def test_normal_non_threaded(self): # The context object is pretty straightforward. # This shows the non threaded usage of this object. context = MultipartUploadContext(expected_parts=3) # First you can announce an upload id. context.announce_upload_id('my_upload_id') # Then a thread that was waiting on the id would be notified. self.assertEqual(context.wait_for_upload_id(), 'my_upload_id') # Then thread would chug away at the parts. context.announce_finished_part(etag='etag1', part_number=1) context.announce_finished_part(etag='etag2', part_number=2) context.announce_finished_part(etag='etag3', part_number=3) # Then a thread that was waiting for all the parts to finish # would be notified. self.assertEqual(context.wait_for_parts_to_finish(), [ {'ETag': 'etag1', 'PartNumber': 1}, {'ETag': 'etag2', 'PartNumber': 2}, {'ETag': 'etag3', 'PartNumber': 3}])
def test_streaming_threaded_parts(self): # This is similar to the basic threaded parts test but instead # the thread has to wait to know exactly how many parts are # expected from the stream. This is indicated when the expected # parts of the context changes from ... to an integer. self.context = MultipartUploadContext(expected_parts='...') upload_part_thread = threading.Thread(target=self.upload_part, args=(1,)) # Once this thread starts it will immediately block. self.start_thread(upload_part_thread) # Also, let's start the thread that will do the complete # multipart upload. It will also block because it needs all # the parts so it's blocked up the upload_part_thread. It also # needs the upload_id so it's blocked on that as well. complete_upload_thread = threading.Thread(target=self.complete_upload) self.start_thread(complete_upload_thread) # Then finally the CreateMultipartUpload completes and we # announce the upload id. self.create_upload('my_upload_id') # The complete upload thread should still be waiting for an expect # parts number. with self.call_lock: was_completed = (len(self.calls) > 2) # The upload_part thread can now proceed as well as the complete # multipart upload thread. self.context.announce_total_parts(1) self.join_threads() self.assertIsNone(self.caught_exception) # Make sure that the completed task was never called since it was # waiting to announce the parts. self.assertFalse(was_completed) # We can verify that the invariants still hold. self.assertEqual(len(self.calls), 3) # First there should be three calls, create, upload, complete. self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[1][0], 'upload_part') self.assertEqual(self.calls[2][0], 'complete_upload') # Verify the correct args were used. self.assertEqual(self.calls[0][1], 'my_upload_id') self.assertEqual(self.calls[1][1:], (1, 'my_upload_id')) self.assertEqual( self.calls[2][1:], ('my_upload_id', [{'ETag': 'etag1', 'PartNumber': 1}]))
def test_randomized_stress_test(self): # Now given that we've verified the functionality from # the two tests above, we randomize the threading to ensure # that the order doesn't actually matter. The invariant that # the CreateMultipartUpload is called first, then UploadPart # operations are called with the appropriate upload_id, then # CompleteMultipartUpload with the appropriate upload_id and # parts list should hold true regardless of how the threads # are ordered. # I've run this with much larger values, but 100 is a good # tradeoff with coverage vs. execution time. for i in range(100): expected_parts = random.randint(2, 50) self.context = MultipartUploadContext(expected_parts=expected_parts) self.threads = [] self.calls = [] all_threads = [ threading.Thread(target=self.complete_upload), threading.Thread(target=self.create_upload, args=('my_upload_id',)), threading.Thread(target=self.wait_for_upload_complete), ] for i in range(1, expected_parts + 1): all_threads.append( threading.Thread(target=self.upload_part, args=(i,)) ) random.shuffle(all_threads) for thread in all_threads: self.start_thread(thread) self.join_threads() self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[-1][0], 'arbitrary_post_complete_operation') self.assertEqual(self.calls[-2][0], 'complete_upload') parts = set() for call in self.calls[1:-2]: self.assertEqual(call[0], 'upload_part') self.assertEqual(call[2], 'my_upload_id') parts.add(call[1]) self.assertEqual(len(parts), expected_parts)
def test_normal_non_threaded(self): # The context object is pretty straightforward. # This shows the non threaded usage of this object. context = MultipartUploadContext(expected_parts=3) # First you can announce an upload id. context.announce_upload_id('my_upload_id') # Then a thread that was waiting on the id would be notified. self.assertEqual(context.wait_for_upload_id(), 'my_upload_id') # Then thread would chug away at the parts. context.announce_finished_part(etag='etag1', part_number=1) context.announce_finished_part(etag='etag2', part_number=2) context.announce_finished_part(etag='etag3', part_number=3) # Then a thread that was waiting for all the parts to finish # would be notified. self.assertEqual(context.wait_for_parts_to_finish(), [{ 'ETag': 'etag1', 'PartNumber': 1 }, { 'ETag': 'etag2', 'PartNumber': 2 }, { 'ETag': 'etag3', 'PartNumber': 3 }]) context.announce_completed() # This will return right away since we've already announced completion. self.assertIsNone(context.wait_for_completion())
def setUp(self): self.context = MultipartUploadContext(expected_parts=1) self.calls = [] self.threads = [] self.call_lock = threading.Lock() self.caught_exception = None
class TestMultipartUploadContext(unittest.TestCase): def setUp(self): self.context = MultipartUploadContext(expected_parts=1) self.calls = [] self.threads = [] self.call_lock = threading.Lock() self.caught_exception = None def tearDown(self): self.join_threads() def join_threads(self): for thread in self.threads: thread.join() def upload_part(self, part_number): # This simulates what a thread would do if it wanted to upload # a part. First it would wait for the upload id. try: upload_id = self.context.wait_for_upload_id() except Exception as e: self.caught_exception = e return with self.call_lock: self.calls.append(('upload_part', part_number, upload_id)) # Then it would call UploadPart here. # Then it would announce that it's finished with a part. self.context.announce_finished_part(etag='etag%s' % part_number, part_number=part_number) def complete_upload(self): try: upload_id = self.context.wait_for_upload_id() parts = self.context.wait_for_parts_to_finish() except Exception as e: self.caught_exception = e return with self.call_lock: self.calls.append(('complete_upload', upload_id, parts)) self.context.announce_completed() def wait_for_upload_complete(self): try: self.context.wait_for_completion() except Exception as e: self.caught_exception = e return with self.call_lock: self.calls.append(('arbitrary_post_complete_operation', )) def create_upload(self, upload_id): with self.call_lock: self.calls.append(('create_multipart_upload', 'my_upload_id')) self.context.announce_upload_id(upload_id) def start_thread(self, thread): thread.start() self.threads.append(thread) def test_normal_non_threaded(self): # The context object is pretty straightforward. # This shows the non threaded usage of this object. context = MultipartUploadContext(expected_parts=3) # First you can announce an upload id. context.announce_upload_id('my_upload_id') # Then a thread that was waiting on the id would be notified. self.assertEqual(context.wait_for_upload_id(), 'my_upload_id') # Then thread would chug away at the parts. context.announce_finished_part(etag='etag1', part_number=1) context.announce_finished_part(etag='etag2', part_number=2) context.announce_finished_part(etag='etag3', part_number=3) # Then a thread that was waiting for all the parts to finish # would be notified. self.assertEqual(context.wait_for_parts_to_finish(), [{ 'ETag': 'etag1', 'PartNumber': 1 }, { 'ETag': 'etag2', 'PartNumber': 2 }, { 'ETag': 'etag3', 'PartNumber': 3 }]) context.announce_completed() # This will return right away since we've already announced completion. self.assertIsNone(context.wait_for_completion()) def test_basic_threaded_parts(self): # Now while test_normal_non_threaded showed the conceptual idea, # the real strength of MultipartUploadContext is that it works # when there are threads and when these threads operate out of # sequence. # For example, let's say a thread comes along that wants # to upload a part. It needs to wait until the upload id # is announced. upload_part_thread = threading.Thread(target=self.upload_part, args=(1, )) # Once this thread starts it will immediately block. self.start_thread(upload_part_thread) # Also, let's start the thread that will do the complete # multipart upload. It will also block because it needs all # the parts so it's blocked up the upload_part_thread. It also # needs the upload_id so it's blocked on that as well. complete_upload_thread = threading.Thread(target=self.complete_upload) self.start_thread(complete_upload_thread) # We'll also have some other arbitrary thread that's just waiting for # the whole upload to be complete. This is not the same as # complete_upload_thread, as that thread is used to complete the # upload. This thread wants to know when *that* process is all done. arbitrary_waiting_thread = threading.Thread( target=self.wait_for_upload_complete) self.start_thread(arbitrary_waiting_thread) # Then finally the CreateMultipartUpload completes and we # announce the upload id. self.create_upload('my_upload_id') # The upload_part thread can now proceed as well as the complete # multipart upload thread. self.join_threads() self.assertIsNone(self.caught_exception) # We can verify that the invariants still hold. self.assertEqual(len(self.calls), 4) # First there should be three calls, create, upload, complete. self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[1][0], 'upload_part') self.assertEqual(self.calls[2][0], 'complete_upload') # Then anything that was waiting for the operation to complete should # be called afterwards. self.assertEqual(self.calls[3][0], 'arbitrary_post_complete_operation') # Verify the correct args were used. self.assertEqual(self.calls[0][1], 'my_upload_id') self.assertEqual(self.calls[1][1:], (1, 'my_upload_id')) self.assertEqual(self.calls[2][1:], ('my_upload_id', [{ 'ETag': 'etag1', 'PartNumber': 1 }])) def test_randomized_stress_test(self): # Now given that we've verified the functionality from # the two tests above, we randomize the threading to ensure # that the order doesn't actually matter. The invariant that # the CreateMultipartUpload is called first, then UploadPart # operations are called with the appropriate upload_id, then # CompleteMultipartUpload with the appropriate upload_id and # parts list should hold true regardless of how the threads # are ordered. # I've run this with much larger values, but 100 is a good # tradeoff with coverage vs. execution time. for i in range(100): expected_parts = random.randint(2, 50) self.context = MultipartUploadContext( expected_parts=expected_parts) self.threads = [] self.calls = [] all_threads = [ threading.Thread(target=self.complete_upload), threading.Thread(target=self.create_upload, args=('my_upload_id', )), threading.Thread(target=self.wait_for_upload_complete), ] for i in range(1, expected_parts + 1): all_threads.append( threading.Thread(target=self.upload_part, args=(i, ))) random.shuffle(all_threads) for thread in all_threads: self.start_thread(thread) self.join_threads() self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[-1][0], 'arbitrary_post_complete_operation') self.assertEqual(self.calls[-2][0], 'complete_upload') parts = set() for call in self.calls[1:-2]: self.assertEqual(call[0], 'upload_part') self.assertEqual(call[2], 'my_upload_id') parts.add(call[1]) self.assertEqual(len(parts), expected_parts) def test_can_cancel_tasks(self): # Let's say that we want have a thread waiting for the upload id. upload_part_thread = threading.Thread(target=self.upload_part, args=(1, )) self.start_thread(upload_part_thread) # But for whatever reason we aren't able to call CreateMultipartUpload. # We'd like to let the other thread know that it should abort. self.context.cancel_upload() # The start thread should be finished. self.join_threads() # No s3 calls should have been made. self.assertEqual(self.calls, []) # And any thread that tries to wait for data will get an exception. with self.assertRaises(UploadCancelledError): self.context.wait_for_upload_id() with self.assertRaises(UploadCancelledError): self.context.wait_for_parts_to_finish() def test_cancel_threads_waiting_for_completion(self): # So we have a thread waiting for the entire upload to complete. arbitrary_waiting_thread = threading.Thread( target=self.wait_for_upload_complete) self.start_thread(arbitrary_waiting_thread) # And as it's waiting, something happens and we cancel the upload. self.context.cancel_upload() # The thread should exit. self.join_threads() # And we should have seen an exception being raised. self.assertIsInstance(self.caught_exception, UploadCancelledError)
class TestMultipartUploadContext(unittest.TestCase): def setUp(self): self.context = MultipartUploadContext(expected_parts=1) self.calls = [] self.threads = [] self.call_lock = threading.Lock() self.caught_exception = None def tearDown(self): self.join_threads() def join_threads(self): for thread in self.threads: thread.join() def upload_part(self, part_number): # This simulates what a thread would do if it wanted to upload # a part. First it would wait for the upload id. try: upload_id = self.context.wait_for_upload_id() except Exception as e: self.caught_exception = e return with self.call_lock: self.calls.append(('upload_part', part_number, upload_id)) # Then it would call UploadPart here. # Then it would announce that it's finished with a part. self.context.announce_finished_part(etag='etag%s' % part_number, part_number=part_number) def complete_upload(self): try: upload_id = self.context.wait_for_upload_id() parts = self.context.wait_for_parts_to_finish() except Exception as e: self.caught_exception = e return with self.call_lock: self.calls.append(('complete_upload', upload_id, parts)) self.context.announce_completed() def wait_for_upload_complete(self): try: self.context.wait_for_completion() except Exception as e: self.caught_exception = e return with self.call_lock: self.calls.append(('arbitrary_post_complete_operation',)) def create_upload(self, upload_id): with self.call_lock: self.calls.append(('create_multipart_upload', 'my_upload_id')) self.context.announce_upload_id(upload_id) def start_thread(self, thread): thread.start() self.threads.append(thread) def test_normal_non_threaded(self): # The context object is pretty straightforward. # This shows the non threaded usage of this object. context = MultipartUploadContext(expected_parts=3) # First you can announce an upload id. context.announce_upload_id('my_upload_id') # Then a thread that was waiting on the id would be notified. self.assertEqual(context.wait_for_upload_id(), 'my_upload_id') # Then thread would chug away at the parts. context.announce_finished_part(etag='etag1', part_number=1) context.announce_finished_part(etag='etag2', part_number=2) context.announce_finished_part(etag='etag3', part_number=3) # Then a thread that was waiting for all the parts to finish # would be notified. self.assertEqual(context.wait_for_parts_to_finish(), [ {'ETag': 'etag1', 'PartNumber': 1}, {'ETag': 'etag2', 'PartNumber': 2}, {'ETag': 'etag3', 'PartNumber': 3}]) context.announce_completed() # This will return right away since we've already announced completion. self.assertIsNone(context.wait_for_completion()) def test_basic_threaded_parts(self): # Now while test_normal_non_threaded showed the conceptual idea, # the real strength of MultipartUploadContext is that it works # when there are threads and when these threads operate out of # sequence. # For example, let's say a thread comes along that wants # to upload a part. It needs to wait until the upload id # is announced. upload_part_thread = threading.Thread(target=self.upload_part, args=(1,)) # Once this thread starts it will immediately block. self.start_thread(upload_part_thread) # Also, let's start the thread that will do the complete # multipart upload. It will also block because it needs all # the parts so it's blocked up the upload_part_thread. It also # needs the upload_id so it's blocked on that as well. complete_upload_thread = threading.Thread(target=self.complete_upload) self.start_thread(complete_upload_thread) # We'll also have some other arbitrary thread that's just waiting for # the whole upload to be complete. This is not the same as # complete_upload_thread, as that thread is used to complete the # upload. This thread wants to know when *that* process is all done. arbitrary_waiting_thread = threading.Thread(target=self.wait_for_upload_complete) self.start_thread(arbitrary_waiting_thread) # Then finally the CreateMultipartUpload completes and we # announce the upload id. self.create_upload('my_upload_id') # The upload_part thread can now proceed as well as the complete # multipart upload thread. self.join_threads() self.assertIsNone(self.caught_exception) # We can verify that the invariants still hold. self.assertEqual(len(self.calls), 4) # First there should be three calls, create, upload, complete. self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[1][0], 'upload_part') self.assertEqual(self.calls[2][0], 'complete_upload') # Then anything that was waiting for the operation to complete should # be called afterwards. self.assertEqual(self.calls[3][0], 'arbitrary_post_complete_operation') # Verify the correct args were used. self.assertEqual(self.calls[0][1], 'my_upload_id') self.assertEqual(self.calls[1][1:], (1, 'my_upload_id')) self.assertEqual( self.calls[2][1:], ('my_upload_id', [{'ETag': 'etag1', 'PartNumber': 1}])) def test_randomized_stress_test(self): # Now given that we've verified the functionality from # the two tests above, we randomize the threading to ensure # that the order doesn't actually matter. The invariant that # the CreateMultipartUpload is called first, then UploadPart # operations are called with the appropriate upload_id, then # CompleteMultipartUpload with the appropriate upload_id and # parts list should hold true regardless of how the threads # are ordered. # I've run this with much larger values, but 100 is a good # tradeoff with coverage vs. execution time. for i in range(100): expected_parts = random.randint(2, 50) self.context = MultipartUploadContext(expected_parts=expected_parts) self.threads = [] self.calls = [] all_threads = [ threading.Thread(target=self.complete_upload), threading.Thread(target=self.create_upload, args=('my_upload_id',)), threading.Thread(target=self.wait_for_upload_complete), ] for i in range(1, expected_parts + 1): all_threads.append( threading.Thread(target=self.upload_part, args=(i,)) ) random.shuffle(all_threads) for thread in all_threads: self.start_thread(thread) self.join_threads() self.assertEqual(self.calls[0][0], 'create_multipart_upload') self.assertEqual(self.calls[-1][0], 'arbitrary_post_complete_operation') self.assertEqual(self.calls[-2][0], 'complete_upload') parts = set() for call in self.calls[1:-2]: self.assertEqual(call[0], 'upload_part') self.assertEqual(call[2], 'my_upload_id') parts.add(call[1]) self.assertEqual(len(parts), expected_parts) def test_can_cancel_tasks(self): # Let's say that we want have a thread waiting for the upload id. upload_part_thread = threading.Thread(target=self.upload_part, args=(1,)) self.start_thread(upload_part_thread) # But for whatever reason we aren't able to call CreateMultipartUpload. # We'd like to let the other thread know that it should abort. self.context.cancel_upload() # The start thread should be finished. self.join_threads() # No s3 calls should have been made. self.assertEqual(self.calls, []) # And any thread that tries to wait for data will get an exception. with self.assertRaises(UploadCancelledError): self.context.wait_for_upload_id() with self.assertRaises(UploadCancelledError): self.context.wait_for_parts_to_finish() def test_cancel_threads_waiting_for_completion(self): # So we have a thread waiting for the entire upload to complete. arbitrary_waiting_thread = threading.Thread(target=self.wait_for_upload_complete) self.start_thread(arbitrary_waiting_thread) # And as it's waiting, something happens and we cancel the upload. self.context.cancel_upload() # The thread should exit. self.join_threads() # And we should have seen an exception being raised. self.assertIsInstance(self.caught_exception, UploadCancelledError)