コード例 #1
0
ファイル: mkindex.py プロジェクト: stroykova/search
def compressed_index_to_file_elias_gamma(index, out_file_name):

    print "Compressing with elias gamma..."
    from kbp.univ import elias
    import struct

    file_name = out_file_name + "_elias_gamma"
    f = open(file_name, "wb")
    for k, v in index.items():
        word_len = len(k.encode("utf-8"))
        f.write(struct.pack("I", word_len))
        f.write(k.encode("utf-8"))
        f.write(struct.pack("I", v[0]))

        entries = ""
        for i in v[1]:
            entries += elias.gamma_encode(i + 1)
        if len(entries) % 32 != 0:
            zeroes = 32 - len(entries) % 32
            for i in range(zeroes):
                entries += "0"

        numbers = []
        count = len(entries) / 32

        for idx in range(0, count):
            numbers.append(int(entries[idx * 32 : (idx + 1) * 32], 2))

        f.write(struct.pack("I", count))
        for number in numbers:
            f.write(struct.pack("I", number))
    f.close()
    return
コード例 #2
0
ファイル: mkindex.py プロジェクト: stroykova/search
def compressed_index_to_file_elias_gamma(index, out_file_name):

    print "Compressing with elias gamma..."
    from kbp.univ import elias
    import struct
    file_name = out_file_name + "_elias_gamma"
    f = open(file_name, 'wb')
    for k, v in index.items():
        word_len = len(k.encode("utf-8"))
        f.write(struct.pack('I', word_len))
        f.write(k.encode("utf-8"))
        f.write(struct.pack('I', v[0]))

        entries = ""
        for i in v[1]:
            entries += elias.gamma_encode(i + 1)
        if len(entries) % 32 != 0:
            zeroes = 32 - len(entries) % 32
            for i in range(zeroes):
                entries += "0"

        numbers = []
        count = len(entries) / 32

        for idx in range(0, count):
            numbers.append(int(entries[idx * 32:(idx + 1) * 32], 2))

        f.write(struct.pack('I', count))
        for number in numbers:
            f.write(struct.pack('I', number))
    f.close()
    return
コード例 #3
0
ファイル: elias_test.py プロジェクト: stroykova/search
#Kabopan - Readable Algorithms. Public Domain, 2007-2009

from kbp.univ.elias import ( \
 elias_split, gamma_encode, gamma_decode, 
 interleaved_gamma_encode, interleaved_gamma_decode,
 delta_encode, delta_decode,
 omega_encode, omega_decode)
assert elias_split(1) == (0, "")
assert elias_split(14) == (3, "110")

assert gamma_encode(1) == "1"
assert gamma_encode(2) == "010"
assert gamma_encode(14) == "0001" + "110"

assert gamma_decode("000010001") == (17, 9)
assert gamma_decode("00001000100") == (17, 9)

assert interleaved_gamma_encode(14) == "101001"

assert interleaved_gamma_decode("101001") == (14, 6)

assert delta_encode(17) == "001010001"

assert delta_decode("001010001") == (17, 9)

assert [omega_encode(i) for i in range(1, 18)] ==  ['0', '100', '110', '101000', '101010', '101100', '101110', '1110000',
        '1110010', '1110100', '1110110', '1111000', '1111010', '1111100', '1111110', '10100100000', '10100100010']

assert omega_decode('10100100010') == (17, 11)
assert omega_decode('1010010001000') == (17, 11)