nAccuracy=args.nAccuracy, globalNN=args.globalNN, similarityThreshold=args.similarityThreshold, prototype_file=args.prototypeFile) if args.mode == "lattices": for line in sys.stdin: print( base_decompounder.get_decompound_lattice( line.decode('utf8').rstrip('\n').title(), )) elif args.mode == "w2v_dict": for word in base_decompounder.model.vocab.keys(): print word.encode('utf-8') elif args.mode in ["1-best", "dict_w2v"]: vit = ViterbiDecompounder() vit.load_weights(modelSetup["WEIGHTS"]) words = [] if args.mode == "1-best": words = map(lambda line: line.decode('utf8').strip(), sys.stdin) else: words = base_decompounder.model.vocab.keys() print >> sys.stderr, "# words: %d" % len(words) def process_word(word): lattice = Lattice(base_decompounder.get_decompound_lattice(word)) viterbi_path = vit.viterbi_decode(Compound(word, None, lattice)) return [ word.encode('utf-8'), print_path(viterbi_path).encode('utf-8')
print >> sys.stderr, "Loading gensim model..." model = gensim.models.Word2Vec.load_word2vec_format(args.model_folder + '/w2v.bin', binary=True) print >> sys.stderr, "Done." if args.mode == "lattices": for line in sys.stdin: print( get_decompound_lattice( line.decode('utf8').rstrip('\n').title(), args.nAccuracy, args.similarityThreshold ) ) elif args.mode in ["1-best", "dict_w2v"]: vit = ViterbiDecompounder() vit.load_weights(args.weightsFile) if args.mode == "1-best": words = map(lambda line: line.decode('utf8').strip(), fileinput.input()) else: words = list(model.vocab.keys()) for word in words: lattice = Lattice(get_decompound_lattice(word, args.nAccuracy, args.similarityThreshold)) viterbi_path = vit.viterbi_decode(Compound(word, None, lattice)) print " ".join(map(lambda p: "%d,%d" % p, viterbi_path)
similarityThreshold=args.similarityThreshold, prototype_file=args.prototypeFile) if args.mode == "lattices": for line in sys.stdin: print( base_decompounder.get_decompound_lattice( line.decode('utf8').rstrip('\n').title(), ) ) elif args.mode == "w2v_dict": for word in base_decompounder.model.vocab.keys(): print word.encode('utf-8') elif args.mode in ["1-best", "dict_w2v"]: vit = ViterbiDecompounder() vit.load_weights(modelSetup["WEIGHTS"]) words = [] if args.mode == "1-best": words = map(lambda line: line.decode('utf8').strip(), sys.stdin) else: words = base_decompounder.model.vocab.keys() print >>sys.stderr, "# words: %d" % len(words) def process_word(word): lattice = Lattice(base_decompounder.get_decompound_lattice(word)) viterbi_path = vit.viterbi_decode(Compound(word, None, lattice)) return [word.encode('utf-8'), print_path(viterbi_path).encode('utf-8')]