def compress(input_file, output_path): ''' Compress input_file, store it in output_path and then return output_file ''' # Get file name and extension input_filename, input_fileext = os.path.splitext( os.path.basename(input_file)) output_filename = input_filename + '.huffman' output_file = os.path.join(output_path, output_filename) print(output_file + " aa") with open(input_file, 'r') as f: data = f.read() f.close() # Get frequency table from data frequencies = collections.Counter(data) root = build_tree(frequencies) encoded_str = utility.get_encoded_str(root, data) padded_encoded_str = utility.pad_encoded_str(encoded_str) byte_data = utility.get_byte_array(padded_encoded_str) with open(output_file, 'wb') as out: # Serialize data to file pickle.dump((frequencies, byte_data), out) out.close() return output_file
def test_get_encoded_str(self): data = 'ABB' n1 = Node('A', 1) n2 = Node('B', 2) root = Node('', 3, n2, n1) encoded_str = utility.get_encoded_str(root, data) assert encoded_str == '100'
def test_get_decoded_str(self): data = 'aab' n1 = Node('a', 2) n2 = Node('b', 1) root = Node('', 3, n2, n1) encoded_str = utility.get_encoded_str(root, data) decoded_str = utility.get_decoded_str(root, encoded_str) assert decoded_str == data
def compress(input_file, output_path): input_filename, input_fileext = os.path.splitext( os.path.basename(input_file)) output_filename = input_filename + '.fano' output_file = os.path.join(output_path, output_filename) with open(input_file, 'r') as f: data = f.read() frequencies = collections.Counter(data) nodes = create_nodes_from_frequencies(frequencies) root = build_tree(nodes) codes = utility.get_codes(root) encoded_str = utility.get_encoded_str(root, data) padded_encoded_str = utility.pad_encoded_str(encoded_str) byte_array = utility.get_byte_array(padded_encoded_str) with open(output_file, 'wb') as out: # Serialize data to file pickle.dump((frequencies, byte_array), out) return output_file