コード例 #1
0
    if options.testSample:
        mainTest(translator, loadSample(options.testSample), options)
        translator.reportStats(sys.stdout)

    if options.applySample:
        mainApply(translator, options)
        translator.reportStats(sys.stderr)


# ===========================================================================
if __name__ == '__main__':
    import optparse, tool
    optparser = optparse.OptionParser(usage='%prog [OPTION]... FILE...\n' +
                                      str(__doc__),
                                      version='%prog ' + __version__)
    tool.addOptions(optparser)
    SequiturTool.addTrainOptions(optparser)
    optparser.add_option('-e',
                         '--encoding',
                         default='ISO-8859-15',
                         help='use character set encoding ENC',
                         metavar='ENC')
    optparser.add_option('-P',
                         '--phoneme-to-phoneme',
                         action='store_true',
                         help='train/apply a phoneme-to-phoneme converter')
    optparser.add_option(
        '--test-segmental',
        action='store_true',
        help=
        'evaluate only at segmental level, i.e. do not count syllable boundaries and stress marks'
コード例 #2
0
ファイル: g2p.py プロジェクト: giuliopaci/sequitur-g2p
    if options.testSample:
        mainTest(translator, loadSample(options.testSample), options)
        translator.reportStats(sys.stdout)

    if options.applySample:
        mainApply(translator, options)
        translator.reportStats(sys.stderr)

# ===========================================================================
if __name__ == '__main__':
    import optparse, tool
    optparser = optparse.OptionParser(
        usage   = '%prog [OPTION]... FILE...\n' + __doc__,
        version = '%prog ' + __version__)
    tool.addOptions(optparser)
    SequiturTool.addTrainOptions(optparser)
    optparser.add_option(
        '-e', '--encoding', default='ISO-8859-15',
        help='use character set encoding ENC', metavar='ENC')
    optparser.add_option(
        '-P', '--phoneme-to-phoneme', action='store_true',
        help='train/apply a phoneme-to-phoneme converter')
    optparser.add_option(
        '--test-segmental', action='store_true',
        help='evaluate only at segmental level, i.e. do not count syllable boundaries and stress marks')
    optparser.add_option(
        '-B', '--result', dest='testResult',
        help='store test result in table FILE (for use with bootlog or R)', metavar='FILE')
    optparser.add_option(
        '-a', '--apply', dest='applySample',
コード例 #3
0
        lm = makeLmWriter(options)
    else:
        lm = LmDummy()

    builder.build(counts, lm)

    if __debug__ and False:  ### TESTING
        print('verifying normalization ...', file=sys.stdout)
        lm2 = Lm(lm)
        lm2.checkNormalisation()


if __name__ == '__main__':
    import optparse, tool
    options = optparse.OptionParser()
    tool.addOptions(options)
    options.add_option('-v', '--vocabulary')
    options.add_option('-r', '--read')
    options.add_option('-U',
                       '--count-cutoffs',
                       help='set count cutoff values to n_i for order i',
                       metavar='n_0 n_1 ...')
    options.add_option('-C',
                       '--counts-of-counts',
                       help='read counts-of-counts from FILE',
                       metavar='FILE')
    options.add_option('-M', '--order', type='int', default=3)
    options.add_option('-f',
                       '--lm-format',
                       default='arpa',
                       help='valid choices are: arpa, estar')
コード例 #4
0
ファイル: mGramCounts.py プロジェクト: Holzhaus/sequitur-g2p
	counts = mappedCounts

    if options.write:
	countFile = misc.gOpenOut(options.write)
	TextStorage.write(countFile, counts)

    if options.counts_of_counts:
	coc = [ countsOfCounts(mGramReduceToOrder(counts, order))
		for order in range(options.order) ]
	import pprint
	pprint.pprint(coc, misc.gOpenOut(options.counts_of_counts))


if __name__ == '__main__':
    import optparse, tool
    options = optparse.OptionParser()
    tool.addOptions(options)
    options.add_option('-t', '--text')
    options.add_option('-r', '--read', action='append')
    options.add_option('-v', '--vocabulary')
    options.add_option('-M', '--order', type='int', default=3)
    options.add_option('-w', '--write')
    options.add_option('--map-oov', action='store_true')
    options.add_option('-C', '--counts-of-counts')

    options.add_option('--storage-class', default='smf')
    options.add_option('--memory-limit', type='int')

    options, args = options.parse_args()
    tool.run(main, options, args)
コード例 #5
0
ファイル: g2pImpl.py プロジェクト: jigar23/g2pDocker
def getOptParser():
    import optparse, tool
    optparser = optparse.OptionParser(usage='%prog [OPTION]... FILE...\n' +
                                      str(__doc__),
                                      version='%prog ' + __version__)
    tool.addOptions(optparser)
    SequiturTool.addTrainOptions(optparser)
    optparser.add_option('-e',
                         '--encoding',
                         default='ISO-8859-15',
                         help='use character set encoding ENC',
                         metavar='ENC')
    optparser.add_option('-P',
                         '--phoneme-to-phoneme',
                         action='store_true',
                         help='train/apply a phoneme-to-phoneme converter')
    optparser.add_option(
        '--test-segmental',
        action='store_true',
        help=
        'evaluate only at segmental level, i.e. do not count syllable boundaries and stress marks'
    )
    optparser.add_option(
        '-B',
        '--result',
        dest='testResult',
        help='store test result in table FILE (for use with bootlog or R)',
        metavar='FILE')
    optparser.add_option(
        '-a',
        '--apply',
        dest='applySample',
        help='apply grapheme-to-phoneme conversion to words read from FILE',
        metavar='FILE')
    optparser.add_option('-w',
                         '--word',
                         dest='applyWord',
                         help='apply grapheme-to-phoneme conversion to word',
                         metavar='string')
    optparser.add_option(
        '-V',
        '--variants-mass',
        type='float',
        help=
        'generate pronunciation variants until \sum_i p(var_i) >= Q (only effective with --apply)',
        metavar='Q')
    optparser.add_option(
        '--variants-number',
        type='int',
        help=
        'generate up to N pronunciation variants (only effective with --apply)',
        metavar='N')
    optparser.add_option(
        '-f',
        '--fake',
        dest='fakeTranslator',
        help=
        'use a translation memory (read from sample FILE) instead of a genuine model (use in combination with -x to evaluate two files against each other)',
        metavar='FILE')
    optparser.add_option('--stack-limit',
                         type='int',
                         help='limit size of search stack to N elements',
                         metavar='N')
    return optparser