예제 #1
0
def compressed_cp_lines(cps):
    values_per_line = 12
    bytes_ = []
    for cp in cps:
        lzw.add_cp(bytes_, int(cp, 16))
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 32 = {} bits as {} * 8 = {} bits'.format(len(cps), len(cps)*32, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
예제 #2
0
def compressed_case_mapping_lines(mappings):
    values_per_line = 12
    bytes_ = []
    for t in mappings:
        lzw.add_cp(bytes_, int(t[0], 16))
        lzw.add_short(bytes_, t[1][0])
        lzw.add_short(bytes_, t[1][1])
    compressed_bytes = lzw.compress(bytes_)
    print 'rewrote {} * 64 = {} bits as {} * 8 = {} bits'.format(len(mappings), len(mappings)*64, len(bytes_), len(bytes_)*8)
    print 'compressed to {} * 16 = {} bits'.format(len(compressed_bytes), len(compressed_bytes) * 16)
    return lzw.compressed_bytes_to_lines(compressed_bytes, values_per_line)
예제 #3
0
def uncompressed_prop_bytes(cp_prop_pairs):
    bytes_ = []
    for pair in cp_prop_pairs:
        lzw.add_cp(bytes_, pair[0])
        lzw.add_byte(bytes_, pair[1])
    return bytes_
예제 #4
0
        ccc = 0
        if cp in cccs_dict:
            ccc = cccs_dict[cp]
        nfd_quick_check = 'quick_check::yes'
        if cp in quick_check_maps['NFD']:
            nfd_quick_check = quick_check_maps['NFD'][cp]
        nfkd_quick_check = 'quick_check::yes'
        if cp in quick_check_maps['NFKD']:
            nfkd_quick_check = quick_check_maps['NFKD'][cp]
        nfc_quick_check = 'quick_check::yes'
        if cp in quick_check_maps['NFC']:
            nfc_quick_check = quick_check_maps['NFC'][cp]
        nfkc_quick_check = 'quick_check::yes'
        if cp in quick_check_maps['NFKC']:
            nfkc_quick_check = quick_check_maps['NFKC'][cp]
        lzw.add_cp(prop_bytes_, cp)
        lzw.add_short(prop_bytes_, canonical_decomp[0])
        lzw.add_short(prop_bytes_, canonical_decomp[1])
        lzw.add_short(prop_bytes_, compatible_decomp[0])
        lzw.add_short(prop_bytes_, compatible_decomp[1])
        lzw.add_byte(prop_bytes_, int(ccc))
        lzw.add_byte(prop_bytes_, \
                     quick_checks_to_byte(nfd_quick_check, nfkd_quick_check))
        lzw.add_byte(prop_bytes_, \
                     quick_checks_to_byte(nfc_quick_check, nfkc_quick_check))

    value_per_line = 12
    compressed_bytes = lzw.compress(prop_bytes_)
    props_lines, num_shorts = lzw.compressed_bytes_to_lines(
        compressed_bytes, value_per_line)
    #print 'rewrote {} * 144 = {} bits as {} * 8 = {} bits'.format(len(all_cps), len(all_cps)*144, len(prop_bytes_), len(prop_bytes_)*8)
예제 #5
0
    cpp_file = open('collation_data_0.cpp', 'w')
    cpp_file.write(
        collation_data_0_file_form.format(implicit_weights_segments_str,
                                          len(implicit_weights_segments),
                                          reorder_group_str,
                                          len(reorder_group_strings), ce_lines,
                                          len(compressed_ces),
                                          len(collation_elements)))

    key_bytes = []
    #value_bytes = []
    value_strings = []
    for k, v in sorted(fcc_cet.items(), key=lambda x: original_order[x[0]]):
        lzw.add_byte(key_bytes, len(k))
        for x in k:
            lzw.add_cp(key_bytes, x)
        value_strings.append('{{{}, {}}}'.format(v[0], v[1]))
        #lzw.add_short(value_bytes, v[0])
        #lzw.add_short(value_bytes, v[1])
    compressed_keys = lzw.compress(key_bytes)

    # The other data sets are optimizaed by LZW compression.  This one is
    # heavily pessimized.
    # compressed_values = lzw.compress(value_bytes)

    #print 'rewrote {} * 128 = {} bits as {} * 8 = {} bits'.format(len(fcc_cet), len(fcc_cet)*128, len(key_bytes), len(key_bytes)*8)
    #print 'compressed to {} * 16 = {} bits'.format(len(compressed_keys), len(compressed_keys) * 16)
    key_lines = values_to_lines(map(lambda x: hex(x), compressed_keys),
                                'uint16_t', 2500)
    #print 'rewrote {} * 32 = {} bits as {} * 8 = {} bits'.format(len(fcc_cet), len(fcc_cet)*32, len(value_bytes), len(value_bytes)*8)
    #print 'compressed to {} * 16 = {} bits'.format(len(compressed_values), len(compressed_values) * 16)