def run_turkish(): params = Parameter() params.UseTransRules = True params.DoPruning = False params.DoCompound = False params.ExcludeUnreliable = False params.BestNCandSuffix = 150 infile_train = r'data/wordlist.2010.tur.utf8.txt' infile_test_gold = r'data/mit/gold.tur.txt' run_experiment(infile_train, infile_test_gold, params)
def run_finnish(): """Runs an experiment on Finnish data against gold standard results.""" params = Parameter() params.UseTransRules = False params.DoPruning = True params.DoCompound = True params.ExcludeUnreliable = True params.BestNCandSuffix = 150 infile_train = r'data/wordlist.2010.fin.utf8.txt' infile_test_gold = r'data/mit/gold.fin.txt' run_experiment(infile_train, infile_test_gold, params)
params = Parameter() arg_parser = argparse.ArgumentParser() arg_parser.add_argument('infile', help='The input file containing a word list with line format: <word> <freq>') arg_parser.add_argument('outfile', help='The output file to save the segmentation result') arg_parser.add_argument('-p', '--prune', help='Whether use pruning (1|0, default:%s)' % params.DoPruning, type=bool, default=params.DoPruning) arg_parser.add_argument('-t', '--trans', help='Whether use transformation rules (1|0, default:%s)' % params.UseTransRules, type=bool, default=params.UseTransRules) arg_parser.add_argument('-c', '--comp', help='Whether process compounding (1|0, default:%s)' % params.DoCompound, type=bool, default=params.DoCompound) arg_parser.add_argument('-e', '--excl', help='Whether exclude unreliable roots (1|0, default:%s)' % params.ExcludeUnreliable, type=bool, default=params.ExcludeUnreliable) arg_parser.add_argument('-n', '--hyphen', help='Whether explicitly deal with hyphen words (1|0, default:%s)' % params.DoHyphen, type=bool, default=params.DoHyphen) arg_parser.add_argument('-a', '--apos', help='Whether explicitly deal with apostrophes (1|0, default:%s)' % params.DoApostrophe, type=bool, default=params.DoApostrophe) arg_parser.add_argument('-r', '--root', help='Minimal length of roots that will be possibly segmented (default:%s)' % params.MinStemLen, type=int, default=params.MinStemLen) arg_parser.add_argument('-s', '--suff', help='Maximal length of suffixes (default:%s)' % params.MaxSuffixLen, type=int, default=params.MaxSuffixLen) args = arg_parser.parse_args() params.DoPruning = args.prune params.UseTransRules = args.trans params.DoCompound = args.comp params.ExcludeUnreliable = args.excl params.DoHyphen = args.hyphen params.DoApostrophe = args.apos params.MinStemLen = args.root params.MaxSuffixLen = args.suff params.print_all() run(args.infile, args.outfile, params)
'--apos', help='Whether explicitly deal with apostrophes (1|0, default:%s)' % parameters.DoApostrophe, type=bool, default=parameters.DoApostrophe) arg_parser.add_argument( '-r', '--root', help= 'Minimal length of roots that will be possibly segmented (default:%s)' % parameters.MinStemLen, type=int, default=parameters.MinStemLen) arg_parser.add_argument('-s', '--suff', help='Maximal length of suffixes (default:%s)' % parameters.MaxSuffixLen, type=int, default=parameters.MaxSuffixLen) args = arg_parser.parse_args() parameters.DoPruning = args.prune parameters.UseTransRules = args.trans parameters.DoCompound = args.comp parameters.ExcludeUnreliable = args.excl parameters.DoHyphen = args.hyphen parameters.DoApostrophe = args.apos parameters.MinStemLen = args.root parameters.MaxSuffixLen = args.suff parameters.print_all() run(args.infile, args.outfile, parameters)