def test_compress_simple_omega_file_content(self, mocked_priority_queue): data = "ABBCCCDDDD" expected_compressed_data = b'DCBA' + self._test_delimiter + b'\xA3\x69\x20\x7F' # A : 1 : 4 : 101000 # B : 2 : 3 : 110 # C : 3 : 2 : 100 # D : 4 : 1 : 0 # 101000 110 110 100 100 100 0 0 0 0 # 10100011 01101001 00100000 0111111 # A3 69 20 7F # 163 105 32 127 read_stream_path = configuration.test_file_path('simple_omega.txt') write_stream_path = configuration.test_file_path( 'simple_omega_compressed.txt') results_queue = mocked_priority_queue.return_value thread_number = 1 read_stream_start_position = 0 read_limit = None threading_data = (results_queue, thread_number, read_stream_start_position, read_limit) initializing_stream = None check_stream = None try: initializing_stream = open(read_stream_path, 'w') initializing_stream.write(data) initializing_stream.flush() elias._compress_file_content( read_stream_path, write_stream_path, threading_data=threading_data, code_function=TestElias._omega_code_function, ending_bit=TestElias._omega_code_ending_bit) check_stream = open(write_stream_path, 'rb') compressed_data = check_stream.read() check_stream.close() self.assertEqual(expected_compressed_data, compressed_data) finally: if not (initializing_stream is None): initializing_stream.close() if not (check_stream is None): check_stream.close() os.remove(read_stream_path) os.remove(write_stream_path)
def test_shuffle_mapper_results(self): thread_1_data = 'A 1\nB 1\nC 1' thread_2_data = 'A 1\nB 1\nC 1' thread_3_data = 'A 1\nB 1\nC 1' expected_shuffled_mapper_results_dictionary = {'A': [1, 1, 1], 'B': [1, 1, 1], 'C': [1, 1, 1]} thread_1_file = configuration.test_file_path('thread_1.txt') thread_2_file = configuration.test_file_path('thread_2.txt') thread_3_file = configuration.test_file_path('thread_3.txt') thread_1_path = pathlib.Path(thread_1_file) thread_2_path = pathlib.Path(thread_2_file) thread_3_path = pathlib.Path(thread_3_file) thread_result_files = [thread_1_file, thread_2_file, thread_3_file] thread_1_file_stream = None thread_2_file_stream = None thread_3_file_stream = None checking_stream = None try: thread_1_file_stream = open(thread_1_file, 'w') thread_2_file_stream = open(thread_2_file, 'w') thread_3_file_stream = open(thread_3_file, 'w') thread_1_file_stream.write(thread_1_data) thread_2_file_stream.write(thread_2_data) thread_3_file_stream.write(thread_3_data) thread_1_file_stream.close() thread_2_file_stream.close() thread_3_file_stream.close() shuffled_mapper_results_dictionary = characters_distribution._shuffle_mapper_results(thread_result_files) self.assertFalse(thread_1_path.exists()) self.assertFalse(thread_2_path.exists()) self.assertFalse(thread_3_path.exists()) self.assertEqual(expected_shuffled_mapper_results_dictionary, shuffled_mapper_results_dictionary) finally: if not (thread_1_file_stream is None) and not thread_1_file_stream.closed: thread_1_file_stream.close() if not (thread_2_file_stream is None) and not thread_2_file_stream.closed: thread_2_file_stream.close() if not (thread_3_file_stream is None) and not thread_3_file_stream.closed: thread_3_file_stream.close() if not (checking_stream is None) and not checking_stream.closed: checking_stream.close()
def test_combine_threads_results_wrong_priority(self): combined_results_file = configuration.test_file_path( 'combined_results.txt') thread_2_file = configuration.test_file_path('thread_2.txt') results_queue = queue.PriorityQueue() results_queue.put({2, thread_2_file}) try: self.assertRaises(ValueError, elias._combine_threads_results, results_queue, combined_results_file, 3) finally: # since _combine_threading_results opens stream for # combined_results_file, is should be deleted afterwards os.remove(combined_results_file)
def test_compress_1_chunk(self, mocked_combine_threading_results, mocked_thread_result_file_path, mocked_priority_queue, mocked_compress_file_content, mocked_chunk_file): read_file_path = configuration.test_file_path('compress_1_chunk') write_file_path = configuration.test_file_path( 'compress_1_chunk_compressed') code_type = 'gamma' num_of_chunks = 1 chunk_size = 0 mocked_chunk_file.return_value = (num_of_chunks, chunk_size) mocked_thread_result_file_path.return_value = write_file_path expected_read_stream_path_parameter_value = read_file_path expected_write_stream_path_parameter_value = write_file_path expected_thread_number_parameter_value = 1 expected_read_stream_start_position = 0 expected_read_limit = None expected_threading_data_parameter_value = ( mocked_priority_queue.return_value, expected_thread_number_parameter_value, expected_read_stream_start_position, expected_read_limit) expected_code_function_parameter_value = TestElias._gamma_code_function expected_ending_bit_parameter_value = TestElias._gamma_code_ending_bit elias.compress(read_file_path, write_file_path, code_type=code_type) mocked_thread_result_file_path.assert_called_once_with( expected_write_stream_path_parameter_value, expected_thread_number_parameter_value) mocked_compress_file_content.assert_has_calls([ mock.call(expected_read_stream_path_parameter_value, expected_write_stream_path_parameter_value, threading_data=expected_threading_data_parameter_value, code_function=expected_code_function_parameter_value, ending_bit=expected_ending_bit_parameter_value) ]) mocked_combine_threading_results.assert_called_once_with( mocked_priority_queue.return_value, expected_write_stream_path_parameter_value, num_of_chunks)
def test_decompress_3_chunks(self, mocked_chunk_file): binary_data = b'DCBA' + self._test_delimiter + b'\x23\x69\x2F' expected_decompressed_data = "ABBCCCDDDD" read_stream_path = configuration.test_file_path( 'simple_compressed_gamma.txt') write_file_path = configuration.test_file_path( 'simple_decompressed_gamma.txt') code_type = 'gamma' num_of_chunks = 3 chunk_size = 1 mocked_chunk_file.return_value = (num_of_chunks, chunk_size) initializing_stream = None check_stream = None try: initializing_stream = open(read_stream_path, 'wb') initializing_stream.write(binary_data) initializing_stream.close() elias.decompress(read_stream_path, write_file_path, code_type=code_type) check_stream = open(write_file_path, 'r') decompressed_data = check_stream.read() check_stream.close() self.assertEqual(expected_decompressed_data, decompressed_data) finally: if not (initializing_stream is None) and not initializing_stream.closed: initializing_stream.close() if not (check_stream is None) and not check_stream.closed: check_stream.close() os.remove(read_stream_path) os.remove(write_file_path)
def test_map_reduce_count_result(self, mocked_chunk_file): data = "ABCABCABCABC" expected_characters_distributions = {('B', 4), ('C', 4), ('A', 4)} num_of_chunks = 3 chunk_size = 4 mocked_chunk_file.return_value = (num_of_chunks, chunk_size) read_file_path = configuration.test_file_path('test_count_characters_distribution_result') write_stream = None try: write_stream = open(read_file_path, 'w', encoding='utf-8') write_stream.write(data) write_stream.close() characters_distributions = set(characters_distribution._map_reduce_count(read_file_path)) self.assertEqual(expected_characters_distributions, characters_distributions) finally: if not (read_file_path is None): write_stream.close() os.remove(read_file_path)
def test_map_reduce_count_thread_results_file_content(self, mocked_shuffle_mapper_results, mocked_thread_result_file_path, mocked_chunk_file): data = "AAAABBBBCCCC" expected_thread_1_compressed_data = 'A 1\nA 1\nA 1\nA 1' expected_thread_2_compressed_data = 'B 1\nB 1\nB 1\nB 1' expected_thread_3_compressed_data = 'C 1\nC 1\nC 1\nC 1' thread_1_result_file = configuration.test_file_path('count_characters_distribution_thread_1.txt') thread_2_result_file = configuration.test_file_path('count_characters_distribution_thread_2.txt') thread_3_result_file = configuration.test_file_path('count_characters_distribution_thread_3.txt') shuffled_mapper_results = dict() num_of_chunks = 3 chunk_size = 4 mocked_shuffle_mapper_results.return_value = shuffled_mapper_results mocked_thread_result_file_path.side_effect = [thread_1_result_file, thread_2_result_file, thread_3_result_file] mocked_chunk_file.return_value = (num_of_chunks, chunk_size) read_stream_path = configuration.test_file_path('count_characters_distribution.txt') initializing_stream = None thread_1_check_stream = None thread_2_check_stream = None thread_3_check_stream = None try: initializing_stream = open(read_stream_path, 'w', encoding='utf-8') initializing_stream.write(data) initializing_stream.close() characters_distribution._map_reduce_count(read_stream_path) thread_1_check_stream = open(thread_1_result_file, 'r', encoding='utf-8') thread_2_check_stream = open(thread_2_result_file, 'r', encoding='utf-8') thread_3_check_stream = open(thread_3_result_file, 'r', encoding='utf-8') thread_1_mapped_data = thread_1_check_stream.read() thread_2_mapped_data = thread_2_check_stream.read() thread_3_mapped_data = thread_3_check_stream.read() thread_1_check_stream.close() thread_2_check_stream.close() thread_3_check_stream.close() self.assertEqual(expected_thread_1_compressed_data, thread_1_mapped_data) self.assertEqual(expected_thread_2_compressed_data, thread_2_mapped_data) self.assertEqual(expected_thread_3_compressed_data, thread_3_mapped_data) finally: if not (initializing_stream is None) and not initializing_stream.closed: initializing_stream.close() if not (thread_1_check_stream is None) and not thread_1_check_stream.closed: thread_1_check_stream.close() if not (thread_2_check_stream is None) and not thread_2_check_stream.closed: thread_2_check_stream.close() if not (thread_3_check_stream is None) and not thread_3_check_stream.closed: thread_3_check_stream.close() os.remove(read_stream_path) os.remove(thread_1_result_file) os.remove(thread_3_result_file) os.remove(thread_2_result_file)
def test_compress_3_chunks(self, mocked_combine_threading_results, mocked_thread_result_file_path, mocked_priority_queue, mocked_compress_file_content, mocked_chunk_file): read_stream_path = configuration.test_file_path( 'hyper_threaded_gamma_simple.txt') write_stream_path = configuration.test_file_path( 'hyper_threaded_gamma_simple_compress.txt') code_type = 'gamma' thread_1_result_file_path = configuration.test_file_path( 'hyper_threaded_gamma_simple_thread_1.txt') thread_2_result_file_path = configuration.test_file_path( 'hyper_threaded_gamma_simple_thread_2.txt') thread_3_result_file_path = configuration.test_file_path( 'hyper_threaded_gamma_simple_thread_3.txt') thread_1_number = 1 thread_2_number = 2 thread_3_number = 3 thread_1_start_position = 0 thread_2_start_position = 3 thread_3_start_position = 6 thread_1_read_limit = 3 thread_2_read_limit = 3 thread_3_read_limit = None thread_1_threading_configuration = (mocked_priority_queue.return_value, thread_1_number, thread_1_start_position, thread_1_read_limit) thread_2_threading_configuration = (mocked_priority_queue.return_value, thread_2_number, thread_2_start_position, thread_2_read_limit) thread_3_threading_configuration = (mocked_priority_queue.return_value, thread_3_number, thread_3_start_position, thread_3_read_limit) num_of_chunks = 3 thread_chunk = 3 mocked_chunk_file.return_value = (num_of_chunks, thread_chunk) mocked_thread_result_file_path.side_effect = [ thread_1_result_file_path, thread_2_result_file_path, thread_3_result_file_path ] expected_code_function_parameter_value = TestElias._gamma_code_function expected_ending_bit_parameter_value = TestElias._gamma_code_ending_bit elias.compress(read_stream_path, write_stream_path, code_type=code_type) mocked_compress_file_content.assert_has_calls([ mock.call(read_stream_path, thread_1_result_file_path, threading_data=thread_1_threading_configuration, code_function=expected_code_function_parameter_value, ending_bit=expected_ending_bit_parameter_value), mock.call(read_stream_path, thread_2_result_file_path, threading_data=thread_2_threading_configuration, code_function=expected_code_function_parameter_value, ending_bit=expected_ending_bit_parameter_value), mock.call(read_stream_path, thread_3_result_file_path, threading_data=thread_3_threading_configuration, code_function=expected_code_function_parameter_value, ending_bit=expected_ending_bit_parameter_value) ], any_order=True) mocked_combine_threading_results.assert_called_once_with( mocked_priority_queue.return_value, write_stream_path, num_of_chunks)
def test_combine_threads_results_combined_correct(self): thread_1_data = b'thread_1' thread_2_data = b'thread_2' thread_3_data = b'thread_3' expected_combined_data = thread_1_data + thread_2_data + thread_3_data thread_1_file = configuration.test_file_path( 'combine_threading_results_test_thread_1.txt') thread_2_file = configuration.test_file_path( 'combine_threading_results_test_thread_2.txt') thread_3_file = configuration.test_file_path( 'combine_threading_results_test_thread_3.txt') combined_results_file = configuration.test_file_path( 'combined_results.txt') thread_1_path = pathlib.Path(thread_1_file) thread_2_path = pathlib.Path(thread_2_file) thread_3_path = pathlib.Path(thread_3_file) results_queue = queue.PriorityQueue() results_queue.put((1, thread_1_file)) results_queue.put((2, thread_2_file)) results_queue.put((3, thread_3_file)) num_of_threads = 3 thread_1_file_stream = None thread_2_file_stream = None thread_3_file_stream = None checking_stream = None try: thread_1_file_stream = open(thread_1_file, 'wb') thread_2_file_stream = open(thread_2_file, 'wb') thread_3_file_stream = open(thread_3_file, 'wb') thread_1_file_stream.write(thread_1_data) thread_2_file_stream.write(thread_2_data) thread_3_file_stream.write(thread_3_data) thread_1_file_stream.close() thread_2_file_stream.close() thread_3_file_stream.close() elias._combine_threads_results(results_queue, combined_results_file, num_of_threads) checking_stream = open(combined_results_file, 'rb') combined_data = checking_stream.read() self.assertFalse(thread_1_path.exists()) self.assertFalse(thread_2_path.exists()) self.assertFalse(thread_3_path.exists()) self.assertEqual(expected_combined_data, combined_data) finally: if not (thread_1_file_stream is None) and not thread_1_file_stream.closed: thread_1_file_stream.close() if not (thread_2_file_stream is None) and not thread_2_file_stream.closed: thread_2_file_stream.close() if not (thread_3_file_stream is None) and not thread_3_file_stream.closed: thread_3_file_stream.close() if not (checking_stream is None) and not checking_stream.closed: checking_stream.close() os.remove(combined_results_file)
def test_decompress_data_2_chunks(self, mocked_decompress_data, mocked_reverse_dictionary, mocked_generate_dictionary, mocked_chunk_file): bytes_data = TestLZW._wiki_bytes dictionary = TestLZW._wiki_dictionary() reverse_dictionary = TestLZW._wiki_reversed_dictionary() decompress_data_first_call_result = ('TOBEOR', '00', 'R') decompress_data_second_call_result = ('NOTTOBEORTOBEORNOT', TestLZW._empty_str, 'OT') mocked_chunk_file.return_value = {2, 4} mocked_generate_dictionary.return_value = dictionary mocked_reverse_dictionary.return_value = reverse_dictionary mocked_decompress_data.side_effect = [decompress_data_first_call_result, decompress_data_second_call_result] expected_decompress_data_calls_num = 2 expected_decompress_data_calls = [ mock.call( '10101100000001100110100001001100', dictionary, reverse_dictionary, initial_phrase=TestLZW._empty_str ), mock.call( '00' + '1111010000010101011011011101011111100100011110100000100010' + '000000', dictionary, reverse_dictionary, initial_phrase='R' )] expected_decompressed_data = 'TOBEORNOTTOBEORTOBEORNOT' read_file_path = configuration.test_file_path('decompress_wiki_2_chunks') write_file_path = configuration.test_file_path('decompress_wiki_2_chunks_decompressed') initializing_stream = None check_stream = None try: initializing_stream = open(read_file_path, 'wb') initializing_stream.write(bytes_data) initializing_stream.close() lzw.decompress(read_file_path, write_file_path) check_stream = open(write_file_path, 'r') decompressed_data = check_stream.read() check_stream.close() self.assertEqual(expected_decompress_data_calls_num, mocked_decompress_data.call_count) mocked_decompress_data.assert_has_calls(expected_decompress_data_calls) self.assertEqual(expected_decompressed_data, decompressed_data) finally: if not (initializing_stream is None) and not initializing_stream.closed: initializing_stream.close() if not (check_stream is None) and not check_stream.closed: initializing_stream.close() os.remove(read_file_path) os.remove(write_file_path)