def main(): while (1): print("1.Compress\n2.Decompress\n3.Exit") choice = int(input("Enter Choice :")) if choice == 1: fileName = input("Enter file Name: ") print("Encoding : ", fileName) frequencyObject = Frequency(fileName) frequencyTable = frequencyObject.frequencyTable() huffmanObject = Huffman(frequencyTable) huffmanCodes = huffmanObject.huffman() encodeObject = Encode(huffmanCodes, fileName) encodeObject.encode() print("File Encoded as:" + fileName + ".bv\n\n") elif choice == 2: fileName = input("Enter file Name: ") print("decoding : ", fileName) decodeObject = Decode(fileName) decodeObject.decode() print("\nDecoded as " + fileName + "_new.txt") elif choice == 3: print("Bye\n") return else: print("Invalid Choice\n")
def test_huffman(self): text = Helper.read_file("input.txt") huffman = Huffman() encoding_dictionary = huffman.get_encoding_dictionary(text) encoded_text = Helper.encoding(encoding_dictionary, text) decoded_text = Helper.decoding(encoding_dictionary, encoded_text) self.assertEqual(text, decoded_text)
def get_location(root, location_entry): file_location = location_entry.get() hint = "null" time_start = time.time() global Algorithm_choice global Function_choice if Algorithm_choice == 1: if Function_choice == 1: hint = Huffman.huff_compress(file_location) if Function_choice == 2: hint = Huffman.huff_uncompress(file_location) elif Algorithm_choice == 2: if Function_choice == 1: hint = files2zip.files2zip(file_location) if Function_choice == 2: hint = zip2files.zip2files(file_location) elif Algorithm_choice == 3: if Function_choice == 1: hint = lz4.main("-c", file_location) if Function_choice == 2: hint = lz4.main("-x", file_location) time_end = time.time() run_time = time_end - time_start root.destroy() final_page(hint, run_time)
def open_file(self): dir = os.path.dirname('.') fname = str( QtWidgets.QFileDialog.getOpenFileName( self, 'Choose File', dir, 'ALL(*.*);;Python file(*.py *.pyw);;C/C++ file(*.c *.cpp *.h);;Java file(*.java)' )[0]) if fname: fsize = os.path.getsize(fname) # print(fsize) if fsize > 1e6: self.filenames[self.cur_tab] = fname self.open_lines_dialog() return if fname.rsplit('.', maxsplit=1)[-1] == 'ac': content = Huffman().decode(fname) else: try: with open(fname, 'r', encoding='UTF-8') as f: content = f.read() except UnicodeDecodeError: with open(fname, 'r', encoding='GBK') as f: content = f.read() self.new_tab() self.filenames[self.cur_tab] = fname self.rightTabWidget.setTabText(self.rightTabWidget.currentIndex(), fname) self.languages[self.cur_tab] = fname.rsplit('.', maxsplit=1)[-1] if self.languages[self.cur_tab] == 'c': self.languages[self.cur_tab] = 'cpp' self.highlighters[self.cur_tab].set_language( self.languages[self.cur_tab]) # self.text_editors[self.cur_tab].setPlainText(content) self.text_editors[self.cur_tab].insert(content) self.text_editors[self.cur_tab].filename = fname
def save_file_as(self): dir = os.path.dirname('.') fname = str( QtWidgets.QFileDialog.getSaveFileName( self, 'Choose File', dir, 'ALL(*.*);;Compressed file(*.ac);;Python file(*.py *.pyw);;' 'C/C++ file(*.c *.cpp *.h);;Java file(*.java)')[0]) if fname: self.cur_tab = self.rightTabWidget.currentIndex() self.filenames[self.cur_tab] = fname self.rightTabWidget.setTabText(self.rightTabWidget.currentIndex(), fname) self.languages[self.cur_tab] = fname.rsplit('.', maxsplit=1)[-1] self.highlighters[self.cur_tab].set_language( self.languages[self.cur_tab]) content = self.text_editors[self.cur_tab].toPlainText() if fname.rsplit('.', maxsplit=1)[-1] == 'ac': tmpname = fname.rsplit('.', maxsplit=1)[0] + '.tmp' with open(tmpname, 'w') as f: f.write(content) Huffman().encode(tmpname) os.remove(tmpname) else: with open(fname, 'w') as f: f.write(content)
def multi_train(args, vocab): """多语境模型的训练过程""" if args.negative > 0: print("Initializing Unigram Table") args.table = UnigramTable(vocab) args.table.build() else: print("Initializing Huffman Tree") huffman = Huffman(vocab) huffman.encode() multiSenseModel = Model.MultiSenseModel2(args, vocab) multiSenseModel.init_model() # 开启多线程 t0 = time.time() print("Begin Training with {0} threads.".format(args.num_threads)) args.f_input = open(args.input) args.start_list, args.end_list = FileUtil.FileSplit().split(args, vocab) global_word_count = Value('i', 0) global_alpha = Value('f', args.alpha) lock = Lock() for epoch in range(0, args.epoch): t_begin = time.time() global_word_count.value = 0 args.epoch_index = epoch pool = Pool(processes=args.num_threads, initializer=multi_init_process, initargs=(args, vocab, multiSenseModel, global_word_count, global_alpha, lock)) pool.map(multi_train_process, range(args.num_threads)) t_end = time.time() print( "\r𝑬-{epoch} ⍺={alpha:>10.8f} 𝑇={time:>10.2f}min token/ps {speed:>6.1f}" .format(epoch=epoch, alpha=global_alpha.value, time=(t_end - t_begin) / 60, speed=vocab.word_count / (t_end - t_begin) / args.num_threads), end='') multiSenseModel.saveEmbedding(epoch) args.f_input.close() t1 = time.time() print("") print("Completed Training, Spend {spend_time:>10.2f} minutes.".format( spend_time=(t1 - t0) / 60))
def compress(image, threshold, outputFile): fft_img = fftn(image) #Apply FFT to the original image if debug: plt.imshow(np.abs(fftshift(fft_img)), cmap='gray', norm=LogNorm(vmin=5)) plt.show() threshold = 0.1 * threshold * np.amax( np.abs(fft_img)) #Calculate the threashold comp_fft_img = np.where(np.abs(fft_img) > threshold, fft_img, 0) #Values below the threshold will be turn to 0 if debug: plt.imshow(np.abs(fftshift(comp_fft_img)), cmap='gray', norm=LogNorm(vmin=5)) plt.show() huffman = Huffman(comp_fft_img) huffman.write(comp_fft_img, outputFile)
def __main__(): text = Helper.read_file("input.txt") encoding_dictionary = Huffman().get_encoding_dictionary(text) encoded_text = Helper.encoding(encoding_dictionary, text) decoded_text = Helper.decoding(encoding_dictionary, encoded_text) print(f"\nEncoded text: {encoded_text}") print(f"\nDecoded text: {decoded_text}") run_tests()
def huffman_coding_compressed(data_set,data_set_train,title,figure_i,N1,N2,N3): huff = Huffman() N=[N1,N2,N3] #number of most common value replace data_size=[] data=conv_file2str(data_set) train_set=conv_file2str(data_set_train) data_size.append(len(data)*8) for i in N: [data_n,train_set_n,dictionary_size]=most_common_words(train_set,data,i) huff.generate_code(train_set_n) code = huff.encode(data_n) #print code #decode = huff.decode(code) data_size.append(len(code)+huff.get_dict_size()+dictionary_size) data_size_kb=[size_KB(x) for x in data_size ] #print "data_size[KB]:",data_size_kb labels=["Original Data" , "huffman- input alphabet1" ,\ "huffman- input alphabet2" , "huffman- input alphabet3"] plot(data_size_kb,title,labels,figure_i)
def main(): args = option() if args["type"] == "encode": start_time = time.time() with open(args["input"], 'r') as f: string = f.read() # Get frequency table from data freq = collections.Counter(string) huff = Huffman(freq) binary = huff.Compress(string) end_time = time.time() print("[INFO] The binary representation: {}".format(binary)) print('[INFO] Total run-time: {} ms'.format( (end_time - start_time) * 1000)) with open(args["store"], 'wb') as f: pickle.dump(freq, f) with open(args["output"], 'wb') as f: pickle.dump((binary, freq), f) # Number of bits before compressing uncompressed_size = os.stat(args["input"]).st_size print('[INFO] Uncompressed size: {} bytes'.format(uncompressed_size)) compressed_size = compression_ratio(binary, args["store"]) print('[INFO] Compressed size: {} bytes'.format(compressed_size)) # Calculate compression ratio print('[INFO] Compression ratio = {0} / {1} = {2:.3f}'.format( uncompressed_size, compressed_size, uncompressed_size / compressed_size)) elif args["type"] == "decode": start_time = time.time() with open(args["input"], 'rb') as f: (code, freq) = pickle.load(f) print("[INFO] The encoded binary: {}".format(code)) huff = Huffman(freq) result = huff.Decompress(code) end_time = time.time() print("[INFO] Result decode: {}".format(result)) print('[INFO] Total run-time: {} ms'.format( (end_time - start_time) * 1000)) # write result file with open(args["output"], 'w+') as f: f.write(result) else: print("Error!!!")
def open_all(self): fname = self.filenames[self.cur_tab] if fname.rsplit('.', maxsplit=1)[-1] == 'ac': content = Huffman().decode(fname) else: try: with open(fname, 'r', encoding='UTF-8') as f: content = f.read() except UnicodeDecodeError: with open(fname, 'r', encoding='GBK') as f: content = f.read() self.new_tab() self.filenames[self.cur_tab] = fname self.rightTabWidget.setTabText(self.rightTabWidget.currentIndex(), fname) self.languages[self.cur_tab] = fname.rsplit('.', maxsplit=1)[-1] if self.languages[self.cur_tab] == 'c': self.languages[self.cur_tab] = 'cpp' self.highlighters[self.cur_tab].set_language( self.languages[self.cur_tab]) # self.text_editors[self.cur_tab].setPlainText(content) self.text_editors[self.cur_tab].insert(content) self.text_editors[self.cur_tab].filename = fname
(h_resized, w_resized, d_resized) = resized.shape image_copy = resized.copy() image_copy = cv2.cvtColor(image_copy, cv2.COLOR_RGB2GRAY) #first operations node = Node() node.setFrequencePixels(image_copy, h_resized, w_resized) tmp = node.returnArrayNode() #here we construct txhe heap minimum bottom_up heap = Heap(tmp) S = heap.returnHeapMinimum() #build the tree of heap Max huff = Huffman(S) R = huff.returnHuff() #build the dictionary D = huff.goThroughTree(R) #codification of the picture T, index = huff.compressOp(image_copy, w_resized, h_resized, D) #writing the codification in a file file = open('test.txt', 'w') for i in range(index): file.write(str(T[i])+' ') file1 = open('test.txt', 'r')
""" Huffman encoding/compression script Authors: - Kian Banke Larsen (kilar20) - Silas Pockendahl (silch20) """ from Huffman import Huffman from sys import argv if __name__ == "__main__": if len(argv) != 3: print(f"Usage: python {argv[0]} <input file> <output file>") else: print(f"Compressing '{argv[1]}'...") try: s0, s1 = Huffman.compress(argv[1], argv[2]) # size without header s2 = s1 - Huffman.HEADER_SIZE ratio = 100 * (s0 - s2) // s0 if s0 != 0 else 0 print(f"Wrote to '{argv[2]}'.") print(f" - Input size: {s0:>12}") print(f" - Output size:{s1:>12}") print(f" - Ratio: {ratio:>11}% (ignoring header)") except FileNotFoundError: print(f"The file '{argv[1]}' was not found") except KeyboardInterrupt: print("Compression interrupted") except Exception as e: print(f"Compression failed: {e}")
def setErrors(data): length = len(data) i, j = 0, 7 while j <= length: randIndex = randint(i, j - 1) temp = data[randIndex] temp = '1' if int(temp) == 0 else '0' data = data[:randIndex] + temp + data[randIndex + 1:] i = j j += 7 return data file = ReadFile('txt_files/Text.txt', analize=True) source = Huffman(file.data) encripted = source.encript(file.file_text) channel = Hamming() encripted = channel.encode(encripted) #set errors errorData = setErrors(encripted) writeToFile('Encripted Text', errorData) #fix errors channel = Hamming() trueData = channel.decode(errorData) decripted = source.decript(trueData) writeToFile('Decripted Text', decripted)
def decompress(file): decompress = Huffman.read(file) return normalize((real(ifft(decompress))).astype(int))
def compress(imgName, outName): img = imageio.imread(imgName) ft = np.asarray(fft(img)) # image stuff huffman = Huffman(ft) # must generate the huffamn code first huffman.write(ft, outName) # then encode it and write to a file
def main(): hm = Huffman() hm.menu() pass
def str_to_unicode(symbols): res = [] for i in symbols: res.append(ord(i)) return res def unicode_to_str(ascii_lst): res = [] for i in ascii_lst: res.append(chr(i)) return res if __name__ == '__main__': huffman = Huffman() while True: print_menu() choice = int(input()) if choice == 1: symbols_str = str(input("Введите строку: ")) symbols = str_to_unicode(symbols_str) encode = huffman.encode_str(symbols) encode_str = unicode_to_str(encode) print( f"Строка \'{symbols_str}\' перекодирована в \'{encode_str}\'") decode = huffman.decode_str(encode) decode_str = unicode_to_str(decode) print(f"Строка \'{encode_str}\' расшифрована в \'{decode_str}\'") elif choice == 2:
from Huffman import Huffman if __name__ == '__main__': sinput = 'Hello this is a test, how are you ?' huffman = Huffman(sinput) # Compress input compressed_input = huffman.compress() print(compressed_input) # Decompress input decompressed_input = huffman.decompress() print(decompressed_input)
isCompress = True elif (opt == '-t'): threshold = float(arg) elif (opt == '-o'): #outputFile = open(arg, 'wb') outputFile = arg elif (opt == '-d'): inputFile = arg isDecompress = True elif (opt == '-h'): print_help(progname) elif (opt == '-s'): debug = True if isCompress: if outputFile is None: print('Output file is not defined! Did you use -o <outputfile> ?') sys.exit(1) if isDecompress: print( 'The options -c and -d cannot be used in the same time. Is not possible compress and decompress in the same execution!' ) sys.exit(1) compress(inputFile, threshold, outputFile) elif isDecompress: fft_image = Huffman.read(inputFile) image = decompress(fft_image) Image.fromarray(image).show()
from ReadFile import ReadFile from Huffman import Huffman file = ReadFile('txt_files/Text.txt', analize=True) huffman = Huffman(file.data) encripted = huffman.encript(file.file_text) decripted = huffman.decript(encripted)
from Huffman import Huffman from lz4 import lz41 from Zip import files2zip, zip2files if __name__ == "__main__": algorithm = input("1:Huffman \n2:Zip \n3: lz4 \n请输入数字以选择压缩算法:") if algorithm == "1": de = int(input('请输入您需要进行的操作(1为压缩,2为解压):')) if de == 1: in_file = input('请输入您需要压缩的文件路径:') Huffman.file_encode(in_file) if de == 2: in_file = input('请输入您需要解压的文件路径:') Huffman.file_decode(in_file) elif algorithm == "2": de = int(input('请输入您需要进行的操作(1为压缩,2为解压):')) if de == 1: files2zip.files2zip() if de == 2: zip2files.zip2files() elif algorithm == "3": lz41.main()
print("Encoding Huffman : -e <input_file>") else: #sys.argv 1st argv is inputfile file = str(sys.argv[2]) dna = DNACompute(file) #Create Node nodeA = Node("A", dna.dna_prob[0][0]) nodeT = Node("T", dna.dna_prob[0][1]) nodeC = Node("C", dna.dna_prob[0][2]) nodeG = Node("G", dna.dna_prob[0][3]) #Create Huffman Tree node = sorted([nodeA, nodeT, nodeC, nodeG], key=attrgetter("freq")) node = Huff.HuffmanTree(node) #Create Huffman Code codeA = ['A', "".join(Huff.HuffmanCode(node, "A"))] codeT = ['T', "".join(Huff.HuffmanCode(node, "T"))] codeC = ['C', "".join(Huff.HuffmanCode(node, "C"))] codeG = ['G', "".join(Huff.HuffmanCode(node, "G"))] #Prepair to print out code = [codeA, codeT, codeC, codeG] dnaTest = dna.dna[0].splitlines() encode = Huff.Encoding(code, "".join(dnaTest)) beta_p = [[codeA[1], nodeA.freq], [codeT[1], nodeT.freq], [codeC[1], nodeC.freq], [codeG[1], nodeG.freq]] #print out
You made me a, you made me a believer, believer Last things last By the grace of the fire and the flames You're the face of the future, the blood in my veins, oh ooh The blood in my veins, oh ooh But they never did, ever lived, ebbing and flowing Inhibited, limited 'Til it broke up and it rained down It rained down, like You made me a, you made me a believer, believer (Pain, pain) You break me down, you built me up, believer, believer (Pain) I let the bullets fly, oh let them rain My life, my love, my drive, it came from (Pain) You made me a, you made me a believer, believer""" # print(sample) huffman = Huffman(sample) pprint.pprint(huffman.frequency_map) # pprint.pprint(huffman.huffman_tree) pprint.pprint(huffman.code_map) compressed_text = huffman.compress(sample) print(compressed_text) plain_text = huffman.decompress(compressed_text) print(plain_text)