Beispiel #1
0
def read_compressed_index_from_file_elias_gamma(out_file_name):
    print "Reading compressed index with elias gamma..."
    from kbp.univ import elias
    import struct
    index = dict()

    file_name = out_file_name + "_elias_gamma"
    f = open(file_name, 'rb')

    word_len_bytes = f.read(4)
    idx1 = 0
    while word_len_bytes:
        word_len = struct.unpack('I', word_len_bytes)[0]
        word = f.read(word_len).decode("utf8")
        freq_bytes = f.read(4)

        freq = struct.unpack('I', freq_bytes)[0]
        arr_len_bytes = f.read(4)
        arr_len = struct.unpack('I', arr_len_bytes)[0]
        arr = ""
        for i in range(0, arr_len):
            string = f.read(4)

            number = struct.unpack('I', string)[0]

            formated = "{0:b}".format(number)
            if len(formated) % 32 != 0:
                zeroes = 32 - len(formated) % 32
                for i in range(zeroes):
                    formated = "0" + formated
            arr += formated
        array = list()
        idx = 0
        while len(arr) > 0 and "1" in arr:
            var = elias.gamma_decode(arr)
            idx += 1

            array.append(var[0] - 1)
            arr = arr[var[1]:]

        index[word] = freq, array
        word_len_bytes = f.read(4)
        idx1 += 1

    f.close()
    return index
Beispiel #2
0
#Kabopan - Readable Algorithms. Public Domain, 2007-2009

from kbp.univ.elias import ( \
 elias_split, gamma_encode, gamma_decode, 
 interleaved_gamma_encode, interleaved_gamma_decode,
 delta_encode, delta_decode,
 omega_encode, omega_decode)
assert elias_split(1) == (0, "")
assert elias_split(14) == (3, "110")

assert gamma_encode(1) == "1"
assert gamma_encode(2) == "010"
assert gamma_encode(14) == "0001" + "110"

assert gamma_decode("000010001") == (17, 9)
assert gamma_decode("00001000100") == (17, 9)

assert interleaved_gamma_encode(14) == "101001"

assert interleaved_gamma_decode("101001") == (14, 6)

assert delta_encode(17) == "001010001"

assert delta_decode("001010001") == (17, 9)

assert [omega_encode(i) for i in range(1, 18)] ==  ['0', '100', '110', '101000', '101010', '101100', '101110', '1110000',
        '1110010', '1110100', '1110110', '1111000', '1111010', '1111100', '1111110', '10100100000', '10100100010']

assert omega_decode('10100100010') == (17, 11)
assert omega_decode('1010010001000') == (17, 11)