コード例 #1
0
        nAccuracy=args.nAccuracy,
        globalNN=args.globalNN,
        similarityThreshold=args.similarityThreshold,
        prototype_file=args.prototypeFile)

    if args.mode == "lattices":
        for line in sys.stdin:
            print(
                base_decompounder.get_decompound_lattice(
                    line.decode('utf8').rstrip('\n').title(), ))
    elif args.mode == "w2v_dict":
        for word in base_decompounder.model.vocab.keys():
            print word.encode('utf-8')
    elif args.mode in ["1-best", "dict_w2v"]:
        vit = ViterbiDecompounder()
        vit.load_weights(modelSetup["WEIGHTS"])

        words = []
        if args.mode == "1-best":
            words = map(lambda line: line.decode('utf8').strip(), sys.stdin)
        else:
            words = base_decompounder.model.vocab.keys()

        print >> sys.stderr, "# words: %d" % len(words)

        def process_word(word):
            lattice = Lattice(base_decompounder.get_decompound_lattice(word))
            viterbi_path = vit.viterbi_decode(Compound(word, None, lattice))
            return [
                word.encode('utf-8'),
                print_path(viterbi_path).encode('utf-8')
コード例 #2
0
    print >> sys.stderr, "Loading gensim model..."
    model = gensim.models.Word2Vec.load_word2vec_format(args.model_folder + '/w2v.bin', binary=True)

    print >> sys.stderr, "Done."

    if args.mode == "lattices":
        for line in sys.stdin:
            print(
                get_decompound_lattice(
                    line.decode('utf8').rstrip('\n').title(),
                    args.nAccuracy,
                    args.similarityThreshold
                )
            )

    elif args.mode in ["1-best", "dict_w2v"]:
        vit = ViterbiDecompounder()
        vit.load_weights(args.weightsFile)

        if args.mode == "1-best":
            words = map(lambda line: line.decode('utf8').strip(), fileinput.input())
        else:
            words = list(model.vocab.keys())

        for word in words:
            lattice = Lattice(get_decompound_lattice(word, args.nAccuracy, args.similarityThreshold))
            viterbi_path = vit.viterbi_decode(Compound(word, None, lattice))
            print " ".join(map(lambda p: "%d,%d" % p, viterbi_path)

コード例 #3
0
            similarityThreshold=args.similarityThreshold,
            prototype_file=args.prototypeFile)

    if args.mode == "lattices":
        for line in sys.stdin:
            print(
                base_decompounder.get_decompound_lattice(
                    line.decode('utf8').rstrip('\n').title(),
                )
            )
    elif args.mode == "w2v_dict":
        for word in base_decompounder.model.vocab.keys():
            print word.encode('utf-8')
    elif args.mode in ["1-best", "dict_w2v"]:
        vit = ViterbiDecompounder()
        vit.load_weights(modelSetup["WEIGHTS"])

        words = []
        if args.mode == "1-best":
            words = map(lambda line: line.decode('utf8').strip(),
                    sys.stdin)
        else:
            words = base_decompounder.model.vocab.keys()

        print >>sys.stderr, "# words: %d" % len(words)

        def process_word(word):
            lattice = Lattice(base_decompounder.get_decompound_lattice(word))
            viterbi_path = vit.viterbi_decode(Compound(word, None, lattice))
            return [word.encode('utf-8'), print_path(viterbi_path).encode('utf-8')]