Ejemplo n.º 1
0
def from_aligned_file(syllabus_name, aligned_file, output_file):
    _log.start('Extracting gp-aligned words', nSteps=4)

    _log.log('Building set of expected words')
    include_set = set((w.surface, w.reading) for w in \
            align_core.iter_words(syllabus_name))

    _log.log('Loading alignments')
    alignments = AlignedFile(aligned_file)
    
    _log.log('Saving alignments')
    o_stream = sopen(output_file, 'w')
    for alignment in alignments:
        key = (alignment.grapheme, alignment.phoneme)
        if key in include_set:
            print >> o_stream, alignment.to_line()
            include_set.remove(key)
    o_stream.close()

    if include_set:
        _log.finish('%d entries not found (see missing.log)' % len(include_set))
        o_stream = sopen('missing.log', 'w')
        for surface, reading in sorted(include_set):
            print >> o_stream, '%s %s:%s %s' % (surface, reading, surface,
                reading)
        o_stream.close()
    else:
        _log.finish('All entries found')
Ejemplo n.º 2
0
def from_aligned_file(syllabus_name, aligned_file, output_file):
    _log.start('Extracting gp-aligned words', nSteps=4)

    _log.log('Building set of expected words')
    include_set = set((w.surface, w.reading) for w in \
            align_core.iter_words(syllabus_name))

    _log.log('Loading alignments')
    alignments = AlignedFile(aligned_file)

    _log.log('Saving alignments')
    o_stream = sopen(output_file, 'w')
    for alignment in alignments:
        key = (alignment.grapheme, alignment.phoneme)
        if key in include_set:
            print >> o_stream, alignment.to_line()
            include_set.remove(key)
    o_stream.close()

    if include_set:
        _log.finish('%d entries not found (see missing.log)' %
                    len(include_set))
        o_stream = sopen('missing.log', 'w')
        for surface, reading in sorted(include_set):
            print >> o_stream, '%s %s:%s %s' % (surface, reading, surface,
                                                reading)
        o_stream.close()
    else:
        _log.finish('All entries found')
Ejemplo n.º 3
0
def to_alignment_format(syllabus_name, output_file):
    o_stream = sopen(output_file, 'w')
    for word in align_core.iter_words(syllabus_name):
        if word.reading and word.has_kanji():
            print >> o_stream, word.surface, word.reading
    o_stream.close()