예제 #1
0
def main():
    result = []
    with open(sys.argv[1], 'r') as stream:
        for line in stream:
            line = line.rstrip('\r\n')
            fields = line.split('\t')
            key = fields[0]
            lid = int(fields[1])
            rid = int(fields[2])
            cost = int(fields[3])
            value = fields[4]

            if key == value:
                value = None

            result.append((line, (key, value, lid, rid, cost)))

    # Sort entries in the key-incremental order.
    result.sort(key=lambda e: e[1][0])

    print 'const SuffixToken kSuffixTokens[] = {'
    for (line, (key, value, lid, rid, cost)) in result:
        print '// "%s"' % line
        print '{ %s, %s, %d, %d, %d },' % (
            code_generator_util.ToCppStringLiteral(key),
            code_generator_util.ToCppStringLiteral(value), lid, rid, cost)
    print '};'
예제 #2
0
def OutputValue(name, input_data, output_stream):
    """Outputs mozc::EmbeddedDictionary::Value data to given output_stream.

  The generated code should look like:
  static const mozc::EmbeddedDictionary::Value kNAME_value[] = {
    { "value", "description", "additional_description", 10, 10, 300 },
       :
    { NULL, NULL, NULL, 0, 0, 0 }
  };
  """

    output_stream.write('static const mozc::EmbeddedDictionary::Value '
                        'k%s_value[] = {\n' % name)

    for _, token_list in sorted(input_data.items()):
        for token in sorted(token_list, key=lambda token: token.cost):
            output_stream.write(
                '  { %s, %s, %s, %d, %d, %d },\n' %
                (code_generator_util.ToCppStringLiteral(token.value),
                 code_generator_util.ToCppStringLiteral(token.description),
                 code_generator_util.ToCppStringLiteral(
                     token.additional_description), token.lid, token.rid,
                 token.cost))

    # Output a sentinel.
    output_stream.write('  { NULL, NULL, NULL, 0, 0, 0 }\n')
    output_stream.write('};\n')
예제 #3
0
def OutputEmojiDataList(emoji_data_list, stream):
    """Writes the emoji_data_list to stream."""
    stream.write(
        'static const mozc::EmojiRewriter::EmojiData kEmojiDataList[] = {\n')
    for (emoji, android_pua, utf8_description, docomo_description,
         softbank_description, kddi_description) in emoji_data_list:
        stream.write(
            '  {%s, %d, %s, %s, %s, %s},\n' %
            (code_generator_util.ToCppStringLiteral(emoji), android_pua,
             code_generator_util.ToCppStringLiteral(utf8_description),
             code_generator_util.ToCppStringLiteral(docomo_description),
             code_generator_util.ToCppStringLiteral(softbank_description),
             code_generator_util.ToCppStringLiteral(kddi_description)))
    stream.write('};\n')
예제 #4
0
def OutputTokenData(name, input_data, output_stream):
    """Output token_data to the given output_stream.

  The generated code should look like:
  static const mozc::EmbeddedDictionary::Token kNAME_token_data[] = {
    { "key1", kNAME_value + 0, 10 },
    { "key2", kNAME_value + 10, 30 },
       :
    { NULL, kNAME_value, 10000},
  };
  """
    output_stream.write('static const mozc::EmbeddedDictionary::Token '
                        'k%s_token_data[] = {\n' % name)

    offset = 0
    for key, token_list in sorted(input_data.items()):
        size = len(token_list)
        output_stream.write(
            '  { %s, k%s_value + %d, %d },\n' %
            (code_generator_util.ToCppStringLiteral(key), name, offset, size))
        offset += size

    # Sentinel.
    output_stream.write('  { NULL, k%s_value, %d }\n' % (name, offset))
    output_stream.write('};\n')
예제 #5
0
def WriteSortedSuffixArray(output_filename, suffixes):
    with codecs.open(output_filename, 'w', encoding='utf-8') as stream:
        stream.write('const CounterSuffixEntry kCounterSuffixes[] = {\n')
        for suffix in sorted(suffixes):
            utf8_suffix = suffix.encode('utf-8')
            escaped = code_generator_util.ToCppStringLiteral(utf8_suffix)
            stream.write(u'  {%s, %du}, // "%s"\n' %
                         (escaped, len(utf8_suffix), suffix))
        stream.write('};\n')
예제 #6
0
def OutputPosMap(pos_map, output):
  output.write(HEADER)
  for key, value in sorted(pos_map.items()):
    key = code_generator_util.ToCppStringLiteral(key)
    if value is None:
      # Invalid PosType.
      value = (
          'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
    else:
      value = '::mozc::user_dictionary::UserDictionary::' + value
    output.write('  { %s, %s },\n' % (key, value))
  output.write(FOOTER)
def OutputTokenDict(token_dict, stream):
  """Output token set to stream."""
  stream.write(
      'static const mozc::EmojiRewriter::Token kEmojiTokenList[] = {\n')
  offset = 0
  # Needs to output in reading's lexicographical order as this array
  # will be binary-searched.
  for reading, value_list in sorted(token_dict.items()):
    size = len(value_list)
    stream.write('  { %s, kEmojiValueList + %d, %d },\n' % (
        code_generator_util.ToCppStringLiteral(reading), offset, size))
    offset += size
  stream.write('};\n')
예제 #8
0
def OutputUserPosDataHeader(user_pos_data, output):
    """Prints user_pos_data.h to output."""
    # Output kConjugation
    for index, (_, conjugation_list) in enumerate(user_pos_data):
        output.write(
            'static const UserPOS::ConjugationType kConjugation%d[] = {\n' %
            (index))
        for value_suffix, key_suffix, pos_id in conjugation_list:
            output.write(
                '  { %s, %s, %d },\n' %
                (code_generator_util.ToCppStringLiteral(value_suffix),
                 code_generator_util.ToCppStringLiteral(key_suffix), pos_id))
        output.write('};\n')

    # Output PosToken
    output.write('const UserPOS::POSToken kPOSToken[] = {\n')
    for index, (user_pos, conjunction_list) in enumerate(user_pos_data):
        output.write('  { %s, %d, kConjugation%d },\n' %
                     (code_generator_util.ToCppStringLiteral(user_pos),
                      len(conjunction_list), index))
    # Also output the sentinal.
    output.write('  { NULL, 0, NULL },\n' '};\n')
def WriteHeaderFileForZeroQuery(zero_query_dict, output_file_name, var_name,
                                output_stream):
    """Returns contents for header file that contains a string array."""

    WriteIncludeGuardHeader(output_file_name, output_stream)
    output_stream.write('#include "./prediction/zero_query_list.h"\n')
    output_stream.write('namespace mozc {\n')
    output_stream.write('namespace {\n')

    sorted_keys = sorted(zero_query_dict.keys())
    for i, key in enumerate(sorted_keys):
        if i:
            output_stream.write('\n')
        output_stream.write('const char *%s_key%d = %s;  // "%s"\n' %
                            (var_name, i, cgu.ToCppStringLiteral(key), key))
        output_stream.write('const ZeroQueryEntry %s_values%d[] = {\n' %
                            (var_name, i))
        output_stream.write('\n'.join([
            '  {%s, %s, %s, 0x%x},  // "%s"' %
            (ZeroQueryTypeToString(e.entry_type),
             cgu.ToCppStringLiteral(e.value), EmojiTypeToString(e.emoji_type),
             e.emoji_android_pua, e.value) for e in zero_query_dict[key]
        ]) + '\n')
        output_stream.write('};\n')

    output_stream.write('} // namespace\n')

    output_stream.write('const ZeroQueryList %s_data[] = {\n' % var_name)
    output_stream.write(',\n'.join([
        '  {%s_key%d, %s_values%d, %d}' %
        (var_name, c, var_name, c, len(zero_query_dict[key]))
        for c, key in enumerate(sorted_keys)
    ]) + '\n')
    output_stream.write('};\n')
    output_stream.write('const size_t %s_size = %d;' %
                        (var_name, len(sorted_keys)) + '\n')

    output_stream.write('} // namespace mozc\n')
    WriteIncludeGuardFooter(output_file_name, output_stream)