if options.testSample: mainTest(translator, loadSample(options.testSample), options) translator.reportStats(sys.stdout) if options.applySample: mainApply(translator, options) translator.reportStats(sys.stderr) # =========================================================================== if __name__ == '__main__': import optparse, tool optparser = optparse.OptionParser(usage='%prog [OPTION]... FILE...\n' + str(__doc__), version='%prog ' + __version__) tool.addOptions(optparser) SequiturTool.addTrainOptions(optparser) optparser.add_option('-e', '--encoding', default='ISO-8859-15', help='use character set encoding ENC', metavar='ENC') optparser.add_option('-P', '--phoneme-to-phoneme', action='store_true', help='train/apply a phoneme-to-phoneme converter') optparser.add_option( '--test-segmental', action='store_true', help= 'evaluate only at segmental level, i.e. do not count syllable boundaries and stress marks'
if options.testSample: mainTest(translator, loadSample(options.testSample), options) translator.reportStats(sys.stdout) if options.applySample: mainApply(translator, options) translator.reportStats(sys.stderr) # =========================================================================== if __name__ == '__main__': import optparse, tool optparser = optparse.OptionParser( usage = '%prog [OPTION]... FILE...\n' + __doc__, version = '%prog ' + __version__) tool.addOptions(optparser) SequiturTool.addTrainOptions(optparser) optparser.add_option( '-e', '--encoding', default='ISO-8859-15', help='use character set encoding ENC', metavar='ENC') optparser.add_option( '-P', '--phoneme-to-phoneme', action='store_true', help='train/apply a phoneme-to-phoneme converter') optparser.add_option( '--test-segmental', action='store_true', help='evaluate only at segmental level, i.e. do not count syllable boundaries and stress marks') optparser.add_option( '-B', '--result', dest='testResult', help='store test result in table FILE (for use with bootlog or R)', metavar='FILE') optparser.add_option( '-a', '--apply', dest='applySample',
lm = makeLmWriter(options) else: lm = LmDummy() builder.build(counts, lm) if __debug__ and False: ### TESTING print('verifying normalization ...', file=sys.stdout) lm2 = Lm(lm) lm2.checkNormalisation() if __name__ == '__main__': import optparse, tool options = optparse.OptionParser() tool.addOptions(options) options.add_option('-v', '--vocabulary') options.add_option('-r', '--read') options.add_option('-U', '--count-cutoffs', help='set count cutoff values to n_i for order i', metavar='n_0 n_1 ...') options.add_option('-C', '--counts-of-counts', help='read counts-of-counts from FILE', metavar='FILE') options.add_option('-M', '--order', type='int', default=3) options.add_option('-f', '--lm-format', default='arpa', help='valid choices are: arpa, estar')
counts = mappedCounts if options.write: countFile = misc.gOpenOut(options.write) TextStorage.write(countFile, counts) if options.counts_of_counts: coc = [ countsOfCounts(mGramReduceToOrder(counts, order)) for order in range(options.order) ] import pprint pprint.pprint(coc, misc.gOpenOut(options.counts_of_counts)) if __name__ == '__main__': import optparse, tool options = optparse.OptionParser() tool.addOptions(options) options.add_option('-t', '--text') options.add_option('-r', '--read', action='append') options.add_option('-v', '--vocabulary') options.add_option('-M', '--order', type='int', default=3) options.add_option('-w', '--write') options.add_option('--map-oov', action='store_true') options.add_option('-C', '--counts-of-counts') options.add_option('--storage-class', default='smf') options.add_option('--memory-limit', type='int') options, args = options.parse_args() tool.run(main, options, args)
def getOptParser(): import optparse, tool optparser = optparse.OptionParser(usage='%prog [OPTION]... FILE...\n' + str(__doc__), version='%prog ' + __version__) tool.addOptions(optparser) SequiturTool.addTrainOptions(optparser) optparser.add_option('-e', '--encoding', default='ISO-8859-15', help='use character set encoding ENC', metavar='ENC') optparser.add_option('-P', '--phoneme-to-phoneme', action='store_true', help='train/apply a phoneme-to-phoneme converter') optparser.add_option( '--test-segmental', action='store_true', help= 'evaluate only at segmental level, i.e. do not count syllable boundaries and stress marks' ) optparser.add_option( '-B', '--result', dest='testResult', help='store test result in table FILE (for use with bootlog or R)', metavar='FILE') optparser.add_option( '-a', '--apply', dest='applySample', help='apply grapheme-to-phoneme conversion to words read from FILE', metavar='FILE') optparser.add_option('-w', '--word', dest='applyWord', help='apply grapheme-to-phoneme conversion to word', metavar='string') optparser.add_option( '-V', '--variants-mass', type='float', help= 'generate pronunciation variants until \sum_i p(var_i) >= Q (only effective with --apply)', metavar='Q') optparser.add_option( '--variants-number', type='int', help= 'generate up to N pronunciation variants (only effective with --apply)', metavar='N') optparser.add_option( '-f', '--fake', dest='fakeTranslator', help= 'use a translation memory (read from sample FILE) instead of a genuine model (use in combination with -x to evaluate two files against each other)', metavar='FILE') optparser.add_option('--stack-limit', type='int', help='limit size of search stack to N elements', metavar='N') return optparser