def main(): result = [] with open(sys.argv[1], 'r') as stream: for line in stream: line = line.rstrip('\r\n') fields = line.split('\t') key = fields[0] lid = int(fields[1]) rid = int(fields[2]) cost = int(fields[3]) value = fields[4] if key == value: value = None result.append((line, (key, value, lid, rid, cost))) # Sort entries in the key-incremental order. result.sort(key=lambda e: e[1][0]) print 'const SuffixToken kSuffixTokens[] = {' for (line, (key, value, lid, rid, cost)) in result: print '// "%s"' % line print '{ %s, %s, %d, %d, %d },' % ( code_generator_util.ToCppStringLiteral(key), code_generator_util.ToCppStringLiteral(value), lid, rid, cost) print '};'
def OutputValue(name, input_data, output_stream): """Outputs mozc::EmbeddedDictionary::Value data to given output_stream. The generated code should look like: static const mozc::EmbeddedDictionary::Value kNAME_value[] = { { "value", "description", "additional_description", 10, 10, 300 }, : { NULL, NULL, NULL, 0, 0, 0 } }; """ output_stream.write('static const mozc::EmbeddedDictionary::Value ' 'k%s_value[] = {\n' % name) for _, token_list in sorted(input_data.items()): for token in sorted(token_list, key=lambda token: token.cost): output_stream.write( ' { %s, %s, %s, %d, %d, %d },\n' % (code_generator_util.ToCppStringLiteral(token.value), code_generator_util.ToCppStringLiteral(token.description), code_generator_util.ToCppStringLiteral( token.additional_description), token.lid, token.rid, token.cost)) # Output a sentinel. output_stream.write(' { NULL, NULL, NULL, 0, 0, 0 }\n') output_stream.write('};\n')
def OutputEmojiDataList(emoji_data_list, stream): """Writes the emoji_data_list to stream.""" stream.write( 'static const mozc::EmojiRewriter::EmojiData kEmojiDataList[] = {\n') for (emoji, android_pua, utf8_description, docomo_description, softbank_description, kddi_description) in emoji_data_list: stream.write( ' {%s, %d, %s, %s, %s, %s},\n' % (code_generator_util.ToCppStringLiteral(emoji), android_pua, code_generator_util.ToCppStringLiteral(utf8_description), code_generator_util.ToCppStringLiteral(docomo_description), code_generator_util.ToCppStringLiteral(softbank_description), code_generator_util.ToCppStringLiteral(kddi_description))) stream.write('};\n')
def OutputTokenData(name, input_data, output_stream): """Output token_data to the given output_stream. The generated code should look like: static const mozc::EmbeddedDictionary::Token kNAME_token_data[] = { { "key1", kNAME_value + 0, 10 }, { "key2", kNAME_value + 10, 30 }, : { NULL, kNAME_value, 10000}, }; """ output_stream.write('static const mozc::EmbeddedDictionary::Token ' 'k%s_token_data[] = {\n' % name) offset = 0 for key, token_list in sorted(input_data.items()): size = len(token_list) output_stream.write( ' { %s, k%s_value + %d, %d },\n' % (code_generator_util.ToCppStringLiteral(key), name, offset, size)) offset += size # Sentinel. output_stream.write(' { NULL, k%s_value, %d }\n' % (name, offset)) output_stream.write('};\n')
def WriteSortedSuffixArray(output_filename, suffixes): with codecs.open(output_filename, 'w', encoding='utf-8') as stream: stream.write('const CounterSuffixEntry kCounterSuffixes[] = {\n') for suffix in sorted(suffixes): utf8_suffix = suffix.encode('utf-8') escaped = code_generator_util.ToCppStringLiteral(utf8_suffix) stream.write(u' {%s, %du}, // "%s"\n' % (escaped, len(utf8_suffix), suffix)) stream.write('};\n')
def OutputPosMap(pos_map, output): output.write(HEADER) for key, value in sorted(pos_map.items()): key = code_generator_util.ToCppStringLiteral(key) if value is None: # Invalid PosType. value = ( 'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)') else: value = '::mozc::user_dictionary::UserDictionary::' + value output.write(' { %s, %s },\n' % (key, value)) output.write(FOOTER)
def OutputTokenDict(token_dict, stream): """Output token set to stream.""" stream.write( 'static const mozc::EmojiRewriter::Token kEmojiTokenList[] = {\n') offset = 0 # Needs to output in reading's lexicographical order as this array # will be binary-searched. for reading, value_list in sorted(token_dict.items()): size = len(value_list) stream.write(' { %s, kEmojiValueList + %d, %d },\n' % ( code_generator_util.ToCppStringLiteral(reading), offset, size)) offset += size stream.write('};\n')
def OutputUserPosDataHeader(user_pos_data, output): """Prints user_pos_data.h to output.""" # Output kConjugation for index, (_, conjugation_list) in enumerate(user_pos_data): output.write( 'static const UserPOS::ConjugationType kConjugation%d[] = {\n' % (index)) for value_suffix, key_suffix, pos_id in conjugation_list: output.write( ' { %s, %s, %d },\n' % (code_generator_util.ToCppStringLiteral(value_suffix), code_generator_util.ToCppStringLiteral(key_suffix), pos_id)) output.write('};\n') # Output PosToken output.write('const UserPOS::POSToken kPOSToken[] = {\n') for index, (user_pos, conjunction_list) in enumerate(user_pos_data): output.write(' { %s, %d, kConjugation%d },\n' % (code_generator_util.ToCppStringLiteral(user_pos), len(conjunction_list), index)) # Also output the sentinal. output.write(' { NULL, 0, NULL },\n' '};\n')
def WriteHeaderFileForZeroQuery(zero_query_dict, output_file_name, var_name, output_stream): """Returns contents for header file that contains a string array.""" WriteIncludeGuardHeader(output_file_name, output_stream) output_stream.write('#include "./prediction/zero_query_list.h"\n') output_stream.write('namespace mozc {\n') output_stream.write('namespace {\n') sorted_keys = sorted(zero_query_dict.keys()) for i, key in enumerate(sorted_keys): if i: output_stream.write('\n') output_stream.write('const char *%s_key%d = %s; // "%s"\n' % (var_name, i, cgu.ToCppStringLiteral(key), key)) output_stream.write('const ZeroQueryEntry %s_values%d[] = {\n' % (var_name, i)) output_stream.write('\n'.join([ ' {%s, %s, %s, 0x%x}, // "%s"' % (ZeroQueryTypeToString(e.entry_type), cgu.ToCppStringLiteral(e.value), EmojiTypeToString(e.emoji_type), e.emoji_android_pua, e.value) for e in zero_query_dict[key] ]) + '\n') output_stream.write('};\n') output_stream.write('} // namespace\n') output_stream.write('const ZeroQueryList %s_data[] = {\n' % var_name) output_stream.write(',\n'.join([ ' {%s_key%d, %s_values%d, %d}' % (var_name, c, var_name, c, len(zero_query_dict[key])) for c, key in enumerate(sorted_keys) ]) + '\n') output_stream.write('};\n') output_stream.write('const size_t %s_size = %d;' % (var_name, len(sorted_keys)) + '\n') output_stream.write('} // namespace mozc\n') WriteIncludeGuardFooter(output_file_name, output_stream)