Ejemplo n.º 1
0
def compressed_cp_lines(cps):
    values_per_line = 12
    bytes_ = []
    for cp in cps:
        lzw.add_cp(bytes_, int(cp, 16))
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 32 = {} bits as {} * 8 = {} bits'.format(len(cps), len(cps)*32, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
Ejemplo n.º 2
0
def compressed_prop_lines(cp_prop_pairs):
    values_per_line = 12

    bytes_ = uncompressed_prop_bytes(cp_prop_pairs)
    compressed_bytes = lzw.compress(bytes_)

    #print 'rewrote {} * 64 = {} bits as {} * 8 = {} bits'.format(len(cp_prop_pairs), len(cp_prop_pairs)*64, len(bytes_), len(bytes_)*8)
    #print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)

    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
Ejemplo n.º 3
0
def compressed_case_mapping_lines(mappings):
    values_per_line = 12
    bytes_ = []
    for t in mappings:
        lzw.add_cp(bytes_, int(t[0], 16))
        lzw.add_short(bytes_, t[1][0])
        lzw.add_short(bytes_, t[1][1])
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 64 = {} bits as {} * 8 = {} bits'.format(len(mappings), len(mappings)*64, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
Ejemplo n.º 4
0
def compressed_case_mapping_to_lines(mappings):
    values_per_line = 12
    bytes_ = []
    for t in mappings:
        lzw.add_short(bytes_, t[0][0])
        lzw.add_short(bytes_, t[0][1])
        try:
            x = case_conditions[t[1]] # TODO: Totally wrong!  Just here for size eval.
        except:
            x = 0
        lzw.add_short(bytes_, x)
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 48 = {} bits as {} * 8 = {} bits'.format(len(mappings), len(mappings)*48, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
def cus_lines(cus):
    as_ints = map(lambda x: ord(x), cus)
    values_per_line = 12
    return lzw.compressed_bytes_to_lines(as_ints, values_per_line)[0]
Ejemplo n.º 6
0
def uncompressed_prop_lines(cp_prop_pairs):
    values_per_line = 18
    bytes_ = uncompressed_prop_bytes(cp_prop_pairs)
    return lzw.compressed_bytes_to_lines(bytes_, values_per_line)
Ejemplo n.º 7
0
        if cp in quick_check_maps['NFKC']:
            nfkc_quick_check = quick_check_maps['NFKC'][cp]
        lzw.add_cp(prop_bytes_, cp)
        lzw.add_short(prop_bytes_, canonical_decomp[0])
        lzw.add_short(prop_bytes_, canonical_decomp[1])
        lzw.add_short(prop_bytes_, compatible_decomp[0])
        lzw.add_short(prop_bytes_, compatible_decomp[1])
        lzw.add_byte(prop_bytes_, int(ccc))
        lzw.add_byte(prop_bytes_, \
                     quick_checks_to_byte(nfd_quick_check, nfkd_quick_check))
        lzw.add_byte(prop_bytes_, \
                     quick_checks_to_byte(nfc_quick_check, nfkc_quick_check))

    value_per_line = 12
    compressed_bytes = lzw.compress(prop_bytes_)
    props_lines, num_shorts = lzw.compressed_bytes_to_lines(
        compressed_bytes, value_per_line)
    #print 'rewrote {} * 144 = {} bits as {} * 8 = {} bits'.format(len(all_cps), len(all_cps)*144, len(prop_bytes_), len(prop_bytes_)*8)
    #print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)

    cpp_file = open('normalization_data_cp_props.cpp', 'w')
    cpp_file.write(
        cp_props_file_form.format(canon_all_cps_string,
                                  len(canon_all_cps), compat_all_cps_string,
                                  len(compat_all_cps), props_lines, num_shorts,
                                  len(all_cps)))


def cps_string(cps):
    cps = map(lambda x: hex(x)[2:], cps)
    return ''.join(map(lambda x: r'\U' + '0' * (8 - len(x)) + x, cps))