Esempio n. 1
0
    def test_compress_simple_omega_file_content(self, mocked_priority_queue):
        data = "ABBCCCDDDD"
        expected_compressed_data = b'DCBA' + self._test_delimiter + b'\xA3\x69\x20\x7F'
        # A : 1 : 4 : 101000
        # B : 2 : 3 : 110
        # C : 3 : 2 : 100
        # D : 4 : 1 : 0
        # 101000 110 110 100 100 100 0 0 0 0
        # 10100011 01101001 00100000 0111111
        # A3       69       20       7F
        # 163      105      32       127

        read_stream_path = configuration.test_file_path('simple_omega.txt')
        write_stream_path = configuration.test_file_path(
            'simple_omega_compressed.txt')

        results_queue = mocked_priority_queue.return_value
        thread_number = 1
        read_stream_start_position = 0
        read_limit = None
        threading_data = (results_queue, thread_number,
                          read_stream_start_position, read_limit)

        initializing_stream = None
        check_stream = None
        try:
            initializing_stream = open(read_stream_path, 'w')
            initializing_stream.write(data)
            initializing_stream.flush()

            elias._compress_file_content(
                read_stream_path,
                write_stream_path,
                threading_data=threading_data,
                code_function=TestElias._omega_code_function,
                ending_bit=TestElias._omega_code_ending_bit)

            check_stream = open(write_stream_path, 'rb')
            compressed_data = check_stream.read()
            check_stream.close()

            self.assertEqual(expected_compressed_data, compressed_data)
        finally:
            if not (initializing_stream is None):
                initializing_stream.close()

            if not (check_stream is None):
                check_stream.close()

            os.remove(read_stream_path)
            os.remove(write_stream_path)
Esempio n. 2
0
    def test_shuffle_mapper_results(self):
        thread_1_data = 'A 1\nB 1\nC 1'
        thread_2_data = 'A 1\nB 1\nC 1'
        thread_3_data = 'A 1\nB 1\nC 1'
        expected_shuffled_mapper_results_dictionary = {'A': [1, 1, 1], 'B': [1, 1, 1], 'C': [1, 1, 1]}

        thread_1_file = configuration.test_file_path('thread_1.txt')
        thread_2_file = configuration.test_file_path('thread_2.txt')
        thread_3_file = configuration.test_file_path('thread_3.txt')

        thread_1_path = pathlib.Path(thread_1_file)
        thread_2_path = pathlib.Path(thread_2_file)
        thread_3_path = pathlib.Path(thread_3_file)

        thread_result_files = [thread_1_file, thread_2_file, thread_3_file]

        thread_1_file_stream = None
        thread_2_file_stream = None
        thread_3_file_stream = None
        checking_stream = None

        try:
            thread_1_file_stream = open(thread_1_file, 'w')
            thread_2_file_stream = open(thread_2_file, 'w')
            thread_3_file_stream = open(thread_3_file, 'w')

            thread_1_file_stream.write(thread_1_data)
            thread_2_file_stream.write(thread_2_data)
            thread_3_file_stream.write(thread_3_data)

            thread_1_file_stream.close()
            thread_2_file_stream.close()
            thread_3_file_stream.close()

            shuffled_mapper_results_dictionary = characters_distribution._shuffle_mapper_results(thread_result_files)

            self.assertFalse(thread_1_path.exists())
            self.assertFalse(thread_2_path.exists())
            self.assertFalse(thread_3_path.exists())
            self.assertEqual(expected_shuffled_mapper_results_dictionary, shuffled_mapper_results_dictionary)

        finally:
            if not (thread_1_file_stream is None) and not thread_1_file_stream.closed:
                thread_1_file_stream.close()
            if not (thread_2_file_stream is None) and not thread_2_file_stream.closed:
                thread_2_file_stream.close()
            if not (thread_3_file_stream is None) and not thread_3_file_stream.closed:
                thread_3_file_stream.close()
            if not (checking_stream is None) and not checking_stream.closed:
                checking_stream.close()
Esempio n. 3
0
    def test_combine_threads_results_wrong_priority(self):
        combined_results_file = configuration.test_file_path(
            'combined_results.txt')
        thread_2_file = configuration.test_file_path('thread_2.txt')

        results_queue = queue.PriorityQueue()
        results_queue.put({2, thread_2_file})

        try:
            self.assertRaises(ValueError, elias._combine_threads_results,
                              results_queue, combined_results_file, 3)
        finally:
            # since _combine_threading_results opens stream for
            # combined_results_file, is should be deleted afterwards
            os.remove(combined_results_file)
Esempio n. 4
0
    def test_compress_1_chunk(self, mocked_combine_threading_results,
                              mocked_thread_result_file_path,
                              mocked_priority_queue,
                              mocked_compress_file_content, mocked_chunk_file):
        read_file_path = configuration.test_file_path('compress_1_chunk')
        write_file_path = configuration.test_file_path(
            'compress_1_chunk_compressed')
        code_type = 'gamma'

        num_of_chunks = 1
        chunk_size = 0

        mocked_chunk_file.return_value = (num_of_chunks, chunk_size)
        mocked_thread_result_file_path.return_value = write_file_path

        expected_read_stream_path_parameter_value = read_file_path
        expected_write_stream_path_parameter_value = write_file_path
        expected_thread_number_parameter_value = 1
        expected_read_stream_start_position = 0
        expected_read_limit = None
        expected_threading_data_parameter_value = (
            mocked_priority_queue.return_value,
            expected_thread_number_parameter_value,
            expected_read_stream_start_position, expected_read_limit)
        expected_code_function_parameter_value = TestElias._gamma_code_function
        expected_ending_bit_parameter_value = TestElias._gamma_code_ending_bit

        elias.compress(read_file_path, write_file_path, code_type=code_type)

        mocked_thread_result_file_path.assert_called_once_with(
            expected_write_stream_path_parameter_value,
            expected_thread_number_parameter_value)
        mocked_compress_file_content.assert_has_calls([
            mock.call(expected_read_stream_path_parameter_value,
                      expected_write_stream_path_parameter_value,
                      threading_data=expected_threading_data_parameter_value,
                      code_function=expected_code_function_parameter_value,
                      ending_bit=expected_ending_bit_parameter_value)
        ])
        mocked_combine_threading_results.assert_called_once_with(
            mocked_priority_queue.return_value,
            expected_write_stream_path_parameter_value, num_of_chunks)
Esempio n. 5
0
    def test_decompress_3_chunks(self, mocked_chunk_file):
        binary_data = b'DCBA' + self._test_delimiter + b'\x23\x69\x2F'
        expected_decompressed_data = "ABBCCCDDDD"

        read_stream_path = configuration.test_file_path(
            'simple_compressed_gamma.txt')
        write_file_path = configuration.test_file_path(
            'simple_decompressed_gamma.txt')
        code_type = 'gamma'

        num_of_chunks = 3
        chunk_size = 1

        mocked_chunk_file.return_value = (num_of_chunks, chunk_size)

        initializing_stream = None
        check_stream = None
        try:
            initializing_stream = open(read_stream_path, 'wb')
            initializing_stream.write(binary_data)
            initializing_stream.close()

            elias.decompress(read_stream_path,
                             write_file_path,
                             code_type=code_type)

            check_stream = open(write_file_path, 'r')
            decompressed_data = check_stream.read()
            check_stream.close()

            self.assertEqual(expected_decompressed_data, decompressed_data)
        finally:
            if not (initializing_stream is
                    None) and not initializing_stream.closed:
                initializing_stream.close()
            if not (check_stream is None) and not check_stream.closed:
                check_stream.close()

            os.remove(read_stream_path)
            os.remove(write_file_path)
Esempio n. 6
0
    def test_map_reduce_count_result(self, mocked_chunk_file):
        data = "ABCABCABCABC"
        expected_characters_distributions = {('B', 4), ('C', 4), ('A', 4)}

        num_of_chunks = 3
        chunk_size = 4

        mocked_chunk_file.return_value = (num_of_chunks, chunk_size)

        read_file_path = configuration.test_file_path('test_count_characters_distribution_result')

        write_stream = None
        try:
            write_stream = open(read_file_path, 'w', encoding='utf-8')
            write_stream.write(data)
            write_stream.close()
            characters_distributions = set(characters_distribution._map_reduce_count(read_file_path))

            self.assertEqual(expected_characters_distributions, characters_distributions)
        finally:
            if not (read_file_path is None):
                write_stream.close()

                os.remove(read_file_path)
Esempio n. 7
0
    def test_map_reduce_count_thread_results_file_content(self,
                                                          mocked_shuffle_mapper_results,
                                                          mocked_thread_result_file_path,
                                                          mocked_chunk_file):
        data = "AAAABBBBCCCC"

        expected_thread_1_compressed_data = 'A 1\nA 1\nA 1\nA 1'
        expected_thread_2_compressed_data = 'B 1\nB 1\nB 1\nB 1'
        expected_thread_3_compressed_data = 'C 1\nC 1\nC 1\nC 1'

        thread_1_result_file = configuration.test_file_path('count_characters_distribution_thread_1.txt')
        thread_2_result_file = configuration.test_file_path('count_characters_distribution_thread_2.txt')
        thread_3_result_file = configuration.test_file_path('count_characters_distribution_thread_3.txt')

        shuffled_mapper_results = dict()

        num_of_chunks = 3
        chunk_size = 4

        mocked_shuffle_mapper_results.return_value = shuffled_mapper_results
        mocked_thread_result_file_path.side_effect = [thread_1_result_file,
                                                      thread_2_result_file,
                                                      thread_3_result_file]
        mocked_chunk_file.return_value = (num_of_chunks, chunk_size)

        read_stream_path = configuration.test_file_path('count_characters_distribution.txt')

        initializing_stream = None
        thread_1_check_stream = None
        thread_2_check_stream = None
        thread_3_check_stream = None

        try:
            initializing_stream = open(read_stream_path, 'w', encoding='utf-8')
            initializing_stream.write(data)
            initializing_stream.close()

            characters_distribution._map_reduce_count(read_stream_path)

            thread_1_check_stream = open(thread_1_result_file, 'r', encoding='utf-8')
            thread_2_check_stream = open(thread_2_result_file, 'r', encoding='utf-8')
            thread_3_check_stream = open(thread_3_result_file, 'r', encoding='utf-8')

            thread_1_mapped_data = thread_1_check_stream.read()
            thread_2_mapped_data = thread_2_check_stream.read()
            thread_3_mapped_data = thread_3_check_stream.read()

            thread_1_check_stream.close()
            thread_2_check_stream.close()
            thread_3_check_stream.close()

            self.assertEqual(expected_thread_1_compressed_data, thread_1_mapped_data)
            self.assertEqual(expected_thread_2_compressed_data, thread_2_mapped_data)
            self.assertEqual(expected_thread_3_compressed_data, thread_3_mapped_data)

        finally:
            if not (initializing_stream is None) and not initializing_stream.closed:
                initializing_stream.close()

            if not (thread_1_check_stream is None) and not thread_1_check_stream.closed:
                thread_1_check_stream.close()

            if not (thread_2_check_stream is None) and not thread_2_check_stream.closed:
                thread_2_check_stream.close()

            if not (thread_3_check_stream is None) and not thread_3_check_stream.closed:
                thread_3_check_stream.close()

            os.remove(read_stream_path)
            os.remove(thread_1_result_file)
            os.remove(thread_3_result_file)
            os.remove(thread_2_result_file)
Esempio n. 8
0
    def test_compress_3_chunks(self, mocked_combine_threading_results,
                               mocked_thread_result_file_path,
                               mocked_priority_queue,
                               mocked_compress_file_content,
                               mocked_chunk_file):

        read_stream_path = configuration.test_file_path(
            'hyper_threaded_gamma_simple.txt')
        write_stream_path = configuration.test_file_path(
            'hyper_threaded_gamma_simple_compress.txt')
        code_type = 'gamma'

        thread_1_result_file_path = configuration.test_file_path(
            'hyper_threaded_gamma_simple_thread_1.txt')
        thread_2_result_file_path = configuration.test_file_path(
            'hyper_threaded_gamma_simple_thread_2.txt')
        thread_3_result_file_path = configuration.test_file_path(
            'hyper_threaded_gamma_simple_thread_3.txt')
        thread_1_number = 1
        thread_2_number = 2
        thread_3_number = 3
        thread_1_start_position = 0
        thread_2_start_position = 3
        thread_3_start_position = 6
        thread_1_read_limit = 3
        thread_2_read_limit = 3
        thread_3_read_limit = None
        thread_1_threading_configuration = (mocked_priority_queue.return_value,
                                            thread_1_number,
                                            thread_1_start_position,
                                            thread_1_read_limit)
        thread_2_threading_configuration = (mocked_priority_queue.return_value,
                                            thread_2_number,
                                            thread_2_start_position,
                                            thread_2_read_limit)
        thread_3_threading_configuration = (mocked_priority_queue.return_value,
                                            thread_3_number,
                                            thread_3_start_position,
                                            thread_3_read_limit)

        num_of_chunks = 3
        thread_chunk = 3

        mocked_chunk_file.return_value = (num_of_chunks, thread_chunk)
        mocked_thread_result_file_path.side_effect = [
            thread_1_result_file_path, thread_2_result_file_path,
            thread_3_result_file_path
        ]

        expected_code_function_parameter_value = TestElias._gamma_code_function
        expected_ending_bit_parameter_value = TestElias._gamma_code_ending_bit

        elias.compress(read_stream_path,
                       write_stream_path,
                       code_type=code_type)

        mocked_compress_file_content.assert_has_calls([
            mock.call(read_stream_path,
                      thread_1_result_file_path,
                      threading_data=thread_1_threading_configuration,
                      code_function=expected_code_function_parameter_value,
                      ending_bit=expected_ending_bit_parameter_value),
            mock.call(read_stream_path,
                      thread_2_result_file_path,
                      threading_data=thread_2_threading_configuration,
                      code_function=expected_code_function_parameter_value,
                      ending_bit=expected_ending_bit_parameter_value),
            mock.call(read_stream_path,
                      thread_3_result_file_path,
                      threading_data=thread_3_threading_configuration,
                      code_function=expected_code_function_parameter_value,
                      ending_bit=expected_ending_bit_parameter_value)
        ],
                                                      any_order=True)
        mocked_combine_threading_results.assert_called_once_with(
            mocked_priority_queue.return_value, write_stream_path,
            num_of_chunks)
Esempio n. 9
0
    def test_combine_threads_results_combined_correct(self):
        thread_1_data = b'thread_1'
        thread_2_data = b'thread_2'
        thread_3_data = b'thread_3'
        expected_combined_data = thread_1_data + thread_2_data + thread_3_data

        thread_1_file = configuration.test_file_path(
            'combine_threading_results_test_thread_1.txt')
        thread_2_file = configuration.test_file_path(
            'combine_threading_results_test_thread_2.txt')
        thread_3_file = configuration.test_file_path(
            'combine_threading_results_test_thread_3.txt')
        combined_results_file = configuration.test_file_path(
            'combined_results.txt')

        thread_1_path = pathlib.Path(thread_1_file)
        thread_2_path = pathlib.Path(thread_2_file)
        thread_3_path = pathlib.Path(thread_3_file)

        results_queue = queue.PriorityQueue()
        results_queue.put((1, thread_1_file))
        results_queue.put((2, thread_2_file))
        results_queue.put((3, thread_3_file))

        num_of_threads = 3

        thread_1_file_stream = None
        thread_2_file_stream = None
        thread_3_file_stream = None
        checking_stream = None

        try:
            thread_1_file_stream = open(thread_1_file, 'wb')
            thread_2_file_stream = open(thread_2_file, 'wb')
            thread_3_file_stream = open(thread_3_file, 'wb')

            thread_1_file_stream.write(thread_1_data)
            thread_2_file_stream.write(thread_2_data)
            thread_3_file_stream.write(thread_3_data)

            thread_1_file_stream.close()
            thread_2_file_stream.close()
            thread_3_file_stream.close()

            elias._combine_threads_results(results_queue,
                                           combined_results_file,
                                           num_of_threads)

            checking_stream = open(combined_results_file, 'rb')
            combined_data = checking_stream.read()

            self.assertFalse(thread_1_path.exists())
            self.assertFalse(thread_2_path.exists())
            self.assertFalse(thread_3_path.exists())
            self.assertEqual(expected_combined_data, combined_data)

        finally:
            if not (thread_1_file_stream is
                    None) and not thread_1_file_stream.closed:
                thread_1_file_stream.close()
            if not (thread_2_file_stream is
                    None) and not thread_2_file_stream.closed:
                thread_2_file_stream.close()
            if not (thread_3_file_stream is
                    None) and not thread_3_file_stream.closed:
                thread_3_file_stream.close()
            if not (checking_stream is None) and not checking_stream.closed:
                checking_stream.close()

            os.remove(combined_results_file)
Esempio n. 10
0
    def test_decompress_data_2_chunks(self,
                                      mocked_decompress_data,
                                      mocked_reverse_dictionary,
                                      mocked_generate_dictionary,
                                      mocked_chunk_file):
        bytes_data = TestLZW._wiki_bytes

        dictionary = TestLZW._wiki_dictionary()
        reverse_dictionary = TestLZW._wiki_reversed_dictionary()

        decompress_data_first_call_result = ('TOBEOR', '00', 'R')
        decompress_data_second_call_result = ('NOTTOBEORTOBEORNOT', TestLZW._empty_str, 'OT')

        mocked_chunk_file.return_value = {2, 4}
        mocked_generate_dictionary.return_value = dictionary
        mocked_reverse_dictionary.return_value = reverse_dictionary
        mocked_decompress_data.side_effect = [decompress_data_first_call_result, decompress_data_second_call_result]

        expected_decompress_data_calls_num = 2
        expected_decompress_data_calls = [
            mock.call(
                '10101100000001100110100001001100',
                dictionary,
                reverse_dictionary,
                initial_phrase=TestLZW._empty_str
            ),
            mock.call(
                '00' + '1111010000010101011011011101011111100100011110100000100010' + '000000',
                dictionary,
                reverse_dictionary,
                initial_phrase='R'
            )]
        expected_decompressed_data = 'TOBEORNOTTOBEORTOBEORNOT'

        read_file_path = configuration.test_file_path('decompress_wiki_2_chunks')
        write_file_path = configuration.test_file_path('decompress_wiki_2_chunks_decompressed')

        initializing_stream = None
        check_stream = None
        try:
            initializing_stream = open(read_file_path, 'wb')
            initializing_stream.write(bytes_data)
            initializing_stream.close()

            lzw.decompress(read_file_path, write_file_path)

            check_stream = open(write_file_path, 'r')
            decompressed_data = check_stream.read()
            check_stream.close()

            self.assertEqual(expected_decompress_data_calls_num, mocked_decompress_data.call_count)
            mocked_decompress_data.assert_has_calls(expected_decompress_data_calls)
            self.assertEqual(expected_decompressed_data, decompressed_data)
        finally:
            if not (initializing_stream is None) and not initializing_stream.closed:
                initializing_stream.close()

            if not (check_stream is None) and not check_stream.closed:
                initializing_stream.close()

            os.remove(read_file_path)
            os.remove(write_file_path)