Ejemplo n.º 1
0
class HuffmanDecoder(object):
    def decode(self, filename):
        self.filename = filename
        self.read_key_file()
        self.read_message_file()
        self.write_to_file()
        
    def read_key_file(self): 
        """read_key_file reads in lines from the filename.txt.HUFFMAN.KEY"""
        fp = open(self.filename + ".KEY")
        numOfSymbols = int(fp.readline())
        self.fileLength = int(fp.readline())
        self.dictionary = {}
        for i in range(numOfSymbols): 
            sym = fp.read(1)
            code = fp.readline().strip()
            self.dictionary[code] = sym
        fp.close()

    def read_message_file(self):
        """read_message_file uncompresseses a binary (byte) file and save the resulting text as self.text
        using the HuffmanTree class's find_char method"""
        f = open(self.filename, "rb")
        readBytes = f.read()
        f.close()
        readBytes = list(readBytes)

        stringOfBits = ""
        for i in range(len(readBytes)):
            stringOfBits += binary.EightBitNumToBinary(readBytes[i])

        self.tree = HuffmanTree()
        self.tree.read_dict(self.dictionary)
        
        toPrint = ""
        for i in range(self.fileLength):
            toPrint += self.tree.find_char(stringOfBits)[0]
            stringOfBits = stringOfBits[ self.tree.find_char(stringOfBits)[1]: ]
        self.text = toPrint
        
    def write_to_file(self):
        """write_to_file writes the text from self.text to a new file called self.filename.HUFFMAN.DECODED"""
        f = open(self.filename + ".DECODED", "w")
        f.write(self.text)
        f.close()
Ejemplo n.º 2
0
class HuffmanEncoder(object):
    def encode(self, filename):
        self.filename = filename
        self.read_text(self.filename)
        self.write_to_files()
        self.print_stats()

    def read_text(self, filename):
        """"read_text creates a Counter object with all of the words in a text file
        and then passes that Counter object to another helper function called make_tree"""
        file = open(filename, "r")
        text = file.read()
        file.close()
        self.text = text
        self.count = Counter(text)
        self.make_tree(self.count)

    def make_tree(self, counter):
        """make_tree builds a HuffmanTree and creates a dictionary whose keys are symbols and whose values are 
        the binary code for each symbol and then saves the tree as self.tree"""
        # sortedList = sorted(list, key = lambda HuffmanTree: HuffmanTree.freq)
        counterkeys = list(counter)
        myTrees = []
        for i in counterkeys:
            myTrees += [HuffmanTree(symbol=i, freq=counter[i])]

        while (len(myTrees) > 1):
            lowest = self.lowest(myTrees)
            self.tree = lowest
            myTrees.remove(lowest)
            nextLow = self.lowest(myTrees)
            myTrees.remove(nextLow)
            self.tree = HuffmanTree(right=self.tree,
                                    left=nextLow,
                                    freq=self.tree.freq + nextLow.freq)
            myTrees += [self.tree]

        self.dict = dict(self.tree.get_codes())
        dictionary = dict(
            list(map(lambda x: (x[1], x[0]), self.tree.get_codes())))
        self.tree.read_dict(dictionary)
        return self.tree

    def lowest(self, L):
        """lowest finds the lowest value in list and returns it"""
        lowest = L[0]
        for i in range(len(L)):
            if L[i] < lowest:
                lowest = L[i]
        return lowest

    def write_to_files(self):
        """write_to_files saves two files such that if the input file that we wish to compress is 
        called filename.txt, the two output files are called filename.txt.HUFFMAN and 
        filename.txt.HUFFMAN.KEY"""
        compressKey = open(self.filename + ".HUFFMAN.KEY", "w")
        # the number of distinct symbols (that is, the size of the key)
        compressKey.write(str(len(self.dict)) + "\n")
        # the total number of symbols in the original file
        compressKey.write(str(sum(self.count.values())) + "\n")
        for i in self.dict:
            compressKey.write(i + " " + self.dict[i] + "\n")
        compressKey.close()

        stringOfBits = ""
        for i in self.text:
            stringOfBits += self.dict[i]

        compressed = open(self.filename + ".HUFFMAN", "wb")
        if len(stringOfBits) % 8 != 0:
            stringOfBits += "0" * (8 - len(stringOfBits) % 8)

        num = []
        for x in range(len(stringOfBits) // 8):
            myByte = stringOfBits[x * 8:(x + 1) * 8]
            num += [binary.BinaryToNum(myByte)]

        self.compressedByteLength = bytes(num)
        compressed.write(self.compressedByteLength)
        compressed.close()

    def print_stats(self):
        """print_stats reports the following statistics: the number of different characters in the input file,
        the total number of bytes in the input file, the number of bytes used to store the compressed text, and 
        the "Asymptotic compression ratio"/the ratio of the number of bytes in the Huffman byte file divided 
        by the length of the original file"""
        print("Number of characters in file:", str(sum(self.count.values())))
        file = open(self.filename, "rb")
        readBytes = file.read()
        file.close()
        listOfBytes = list(readBytes)
        print("Number of bytes in the input file:", len(listOfBytes))
        print("Number of bytes in the compressed file:",
              len(self.compressedByteLength))
        print("Asymptotic compression ratio:",
              len(self.compressedByteLength) / len(listOfBytes))