def main(): args = parse_args() eval_start = time.time() model = phonetisaurus.Phonetisaurus(args.model) d = read_dict(args.dict) wer = 0.0 stressless_wer = 0.0 for word, pron in d: # token, beam, threshold, write decoded fsts to disc, # accum probs across unique pronunciations, target prob mass results = model.Phoneticize(word, 1, 500, 10., False, False, 0.0) result = next(iter(results)) pred_pron = [model.FindOsym(u) for u in result.Uniques] oo = ' '.join(pron) pp = ' '.join(pred_pron) if oo != pp: wer += 1 oo = ' '.join(drop_stress(pron)) pp = ' '.join(drop_stress(pred_pron)) if oo != pp: stressless_wer += 1 wer /= float(len(d)) stressless_wer /= float(len(d)) eval_took = time.time() - eval_start print(' Eval: wer %f; stressless wer %f; eval took %f' % (wer, stressless_wer, eval_took))
def create_app(): import phonetisaurus import pathlib path = pathlib.Path(__file__).absolute().parents[2] / 'model_store' logger.info("model directory path = '{}'".format(path)) # start packing net_models = {} # pack arabic model model_ara = phonetisaurus.Phonetisaurus( str(path / 'arabic_ps_1' / 'model.fst')) net_models["ara"] = model_ara logger.info("packed arabic model") # pack chiense model model_chi = phonetisaurus.Phonetisaurus( str(path / 'chinese_ps_1' / 'model.fst')) net_models["chi"] = model_chi logger.info("packed chinese model") # pack hebrew model model_heb = phonetisaurus.Phonetisaurus( str(path / 'hebrew_ps_1' / 'model.fst')) net_models["heb"] = model_heb logger.info("packed hebrew model") # pack japanese model model_jpn = phonetisaurus.Phonetisaurus( str(path / 'katakana_ps_1' / 'model.fst')) net_models["jpn"] = model_jpn logger.info("packed japanese model") # pack korean model model_kor = phonetisaurus.Phonetisaurus( str(path / 'korean_ps_1' / 'model.fst')) net_models["kor"] = model_kor logger.info("packed korean model") # pack russian model model_rus = phonetisaurus.Phonetisaurus( str(path / 'russian_ps_1' / 'model.fst')) net_models["rus"] = model_rus logger.info("packed russian model") # init flask objects app = Flask(__name__) api = Api(app) api.app.config['RESTFUL_JSON'] = {'ensure_ascii': False} api.add_resource(PhonetisaurusNETransliterator, '/predict', resource_class_kwargs={"net_models": net_models}) return app
help="IP to host the service on.", default="localhost") parser.add_argument("--port", "-p", help="Port to use for hosting.", default=8080, type=int) parser.add_argument("--model", "-m", help="Phonetisaurus G2P model.", required=True) parser.add_argument("--lexicon", "-l", help="Reference lexicon.", required=True) parser.add_argument("--verbose", "-v", help="Verbose mode.", default=False, action="store_true") args = parser.parse_args() if args.verbose: for key, val in args.__dict__.iteritems(): print >> sys.stderr, "{0}:\t{1}".format(key, val) _g2pmodel = phonetisaurus.Phonetisaurus(args.model) _loadLexicon(args.lexicon) run(host=args.host, port=args.port, debug=False)