import PQHeap import bitIO ''' Authors: Sofie Louise Madsen - [email protected] Joachim Bülow - [email protected] Simon Soele Madsen - [email protected] ''' # Open the files in readbit and write binary mode inputfile = open(sys.argv[1], "rb") outputfile = open(sys.argv[2], "wb") # Instantiate our bitwriter for writing to output file bitstreamout = bitIO.BitWriter(outputfile) # 2 arrays for keeping track of the occurrences of each character/byte # and the Huffman codes from root to the leaf containing the byte for each byte. occurences = 256 * [0] codes = 256 * [0] # reads the input file and counts the occurence of each byte def read_file_occurences(): while True: byte = inputfile.read(1) if byte == b"": break else: # We have a byte --> translate to integer with [0], increment occurence of that index. occurences[byte[0]] += 1
# The file paths are declared, either from commandline arguments or simple inputs if len(sys.argv) == 3: inPath = sys.argv[1] outPath = sys.argv[2] else: inPath = input('Write name of file to compress:') outPath = input('Write name of compressed file:') # The files are opened in binarymode seen by the 'rb' and 'wb' # We stream the files inFile = open(inPath, 'rb') outFile = open(outPath, 'wb') # streams for input and output file bitstreamin = bitIO.BitReader(inFile) bitstreamout = bitIO.BitWriter(outFile) # The block size of the encoding is defined, in bytes # and the required size of the table is calculated from that blockSize = 1 #byte tableSize = 2**(blockSize * 8) # A table with 2^blockSize inquries, one for each of the possible bytes table = [0] * tableSize # Here we populate the frequency table, # by incrementing the table in the position corresponding to the byte read # The while loop continues as long as there are more bytes to read byte = inFile.read(blockSize) while byte != b'': table[byte[0]] += 1