Ejemplo n.º 1
0
def run_turkish():
    params = Parameter()
    params.UseTransRules = True
    params.DoPruning = False
    params.DoCompound = False
    params.ExcludeUnreliable = False
    params.BestNCandSuffix = 150
    infile_train = r'data/wordlist.2010.tur.utf8.txt'
    infile_test_gold = r'data/mit/gold.tur.txt'
    run_experiment(infile_train, infile_test_gold, params)
Ejemplo n.º 2
0
def run_finnish():
    """Runs an experiment on Finnish data against gold standard results."""
    params = Parameter()
    params.UseTransRules = False
    params.DoPruning = True
    params.DoCompound = True
    params.ExcludeUnreliable = True
    params.BestNCandSuffix = 150
    infile_train = r'data/wordlist.2010.fin.utf8.txt'
    infile_test_gold = r'data/mit/gold.fin.txt'
    run_experiment(infile_train, infile_test_gold, params)
Ejemplo n.º 3
0
    params = Parameter()
    arg_parser = argparse.ArgumentParser()
    arg_parser.add_argument('infile', help='The input file containing a word list with line format: <word> <freq>')
    arg_parser.add_argument('outfile', help='The output file to save the segmentation result')
    arg_parser.add_argument('-p', '--prune', help='Whether use pruning (1|0, default:%s)' % params.DoPruning, type=bool, default=params.DoPruning)
    arg_parser.add_argument('-t', '--trans', help='Whether use transformation rules (1|0, default:%s)' % params.UseTransRules, type=bool, default=params.UseTransRules)
    arg_parser.add_argument('-c', '--comp', help='Whether process compounding (1|0, default:%s)' % params.DoCompound, type=bool, default=params.DoCompound)
    arg_parser.add_argument('-e', '--excl', help='Whether exclude unreliable roots (1|0, default:%s)' % params.ExcludeUnreliable, type=bool, default=params.ExcludeUnreliable)
    arg_parser.add_argument('-n', '--hyphen', help='Whether explicitly deal with hyphen words (1|0, default:%s)' % params.DoHyphen, type=bool, default=params.DoHyphen)
    arg_parser.add_argument('-a', '--apos', help='Whether explicitly deal with apostrophes (1|0, default:%s)' % params.DoApostrophe, type=bool, default=params.DoApostrophe)
    arg_parser.add_argument('-r', '--root', help='Minimal length of roots that will be possibly segmented (default:%s)' % params.MinStemLen, type=int, default=params.MinStemLen)
    arg_parser.add_argument('-s', '--suff', help='Maximal length of suffixes (default:%s)' % params.MaxSuffixLen, type=int, default=params.MaxSuffixLen)
    args = arg_parser.parse_args()
    params.DoPruning = args.prune
    params.UseTransRules = args.trans
    params.DoCompound = args.comp
    params.ExcludeUnreliable = args.excl
    params.DoHyphen = args.hyphen
    params.DoApostrophe = args.apos
    params.MinStemLen = args.root
    params.MaxSuffixLen = args.suff
    params.print_all()
    run(args.infile, args.outfile, params)

    

    



Ejemplo n.º 4
0
        '--apos',
        help='Whether explicitly deal with apostrophes (1|0, default:%s)' %
        parameters.DoApostrophe,
        type=bool,
        default=parameters.DoApostrophe)
    arg_parser.add_argument(
        '-r',
        '--root',
        help=
        'Minimal length of roots that will be possibly segmented (default:%s)'
        % parameters.MinStemLen,
        type=int,
        default=parameters.MinStemLen)
    arg_parser.add_argument('-s',
                            '--suff',
                            help='Maximal length of suffixes (default:%s)' %
                            parameters.MaxSuffixLen,
                            type=int,
                            default=parameters.MaxSuffixLen)
    args = arg_parser.parse_args()
    parameters.DoPruning = args.prune
    parameters.UseTransRules = args.trans
    parameters.DoCompound = args.comp
    parameters.ExcludeUnreliable = args.excl
    parameters.DoHyphen = args.hyphen
    parameters.DoApostrophe = args.apos
    parameters.MinStemLen = args.root
    parameters.MaxSuffixLen = args.suff
    parameters.print_all()
    run(args.infile, args.outfile, parameters)