def main(SylCls): import json import argparse import dictconv parser = argparse.ArgumentParser( description=__doc__, formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('phonesetfile', metavar='PHONESETFILE', type=str, help="File containing the phoneme set (json utf-8).") parser.add_argument('--oformat', metavar='OUTPUTFORMAT', default=dictconv.DEF_OUTFORMAT, help="output format (flat|nested)") parser.add_argument('--defstresstone', metavar='DEFSTRESSTONE', default=dictconv.DEFSTRESSTONE, help="default stress/tone") args = parser.parse_args() #load phoneset with open(args.phonesetfile, encoding="utf-8") as infh: phoneset = json.load(infh) syllabifier = SylCls(phoneset) for line in sys.stdin: fields = line.strip().split() word = fields[0] pronun = fields[1:] syls = syllabifier.syllabify(pronun) sylspec = [str(len(syl)) for syl in syls] stresspat = args.defstresstone * len(sylspec) if args.oformat == "flat": print( dictconv.print_flat(word, "None", stresspat, sylspec, pronun, None)) elif args.oformat == "nested": print( dictconv.print_nested(word, "None", stresspat, sylspec, pronun, phoneset, args.defstresstone, None)) else: raise Exception("Invalid output format specified")
#print(lexstress) else: with open(args.decomp, encoding="utf-8") as infh: wordlist = infh.read().split() lexstress = LexStresserDecomp(phoneset, wordlist) #print(lexstress) for line in sys.stdin: #input format is "flat" separate fields (current stress pattern is ignored/replaced) fields = line.strip().split() word, pos, stresspat, sylspec = fields[:4] pronun = fields[4:] syls = [] i = 0 for syllen in map(int, sylspec): syls.append(pronun[i:i + syllen]) i += syllen stresspat = "".join(map(str, lexstress.get_stress_word(word, syls))) if args.oformat == "flat": print( dictconv.print_flat(word, "None", stresspat, sylspec, pronun, None)) elif args.oformat == "nested": print( dictconv.print_nested(word, "None", stresspat, sylspec, pronun, phoneset, args.defstresstone, None)) else: raise Exception("Invalid output format specified")