def read_compressed_index_from_file_elias_gamma(out_file_name): print "Reading compressed index with elias gamma..." from kbp.univ import elias import struct index = dict() file_name = out_file_name + "_elias_gamma" f = open(file_name, 'rb') word_len_bytes = f.read(4) idx1 = 0 while word_len_bytes: word_len = struct.unpack('I', word_len_bytes)[0] word = f.read(word_len).decode("utf8") freq_bytes = f.read(4) freq = struct.unpack('I', freq_bytes)[0] arr_len_bytes = f.read(4) arr_len = struct.unpack('I', arr_len_bytes)[0] arr = "" for i in range(0, arr_len): string = f.read(4) number = struct.unpack('I', string)[0] formated = "{0:b}".format(number) if len(formated) % 32 != 0: zeroes = 32 - len(formated) % 32 for i in range(zeroes): formated = "0" + formated arr += formated array = list() idx = 0 while len(arr) > 0 and "1" in arr: var = elias.gamma_decode(arr) idx += 1 array.append(var[0] - 1) arr = arr[var[1]:] index[word] = freq, array word_len_bytes = f.read(4) idx1 += 1 f.close() return index
#Kabopan - Readable Algorithms. Public Domain, 2007-2009 from kbp.univ.elias import ( \ elias_split, gamma_encode, gamma_decode, interleaved_gamma_encode, interleaved_gamma_decode, delta_encode, delta_decode, omega_encode, omega_decode) assert elias_split(1) == (0, "") assert elias_split(14) == (3, "110") assert gamma_encode(1) == "1" assert gamma_encode(2) == "010" assert gamma_encode(14) == "0001" + "110" assert gamma_decode("000010001") == (17, 9) assert gamma_decode("00001000100") == (17, 9) assert interleaved_gamma_encode(14) == "101001" assert interleaved_gamma_decode("101001") == (14, 6) assert delta_encode(17) == "001010001" assert delta_decode("001010001") == (17, 9) assert [omega_encode(i) for i in range(1, 18)] == ['0', '100', '110', '101000', '101010', '101100', '101110', '1110000', '1110010', '1110100', '1110110', '1111000', '1111010', '1111100', '1111110', '10100100000', '10100100010'] assert omega_decode('10100100010') == (17, 11) assert omega_decode('1010010001000') == (17, 11)