def phrase2vec(txt, topn): global phrase2vector if phrase2vector is None: from pynlpini.word2vec.word2vector import Word2Vector phrase2vector = Word2Vector.get_phrase_model() words = txt.split() | where(lambda word: word in phrase2vector.vocab.keys) return json.dumps(phrase2vector.most_similar(words, topn=topn), ensure_ascii=False)
def word2vec(txt, topn): global word2vector if word2vector is None: from pynlpini.word2vec.word2vector import Word2Vector word2vector = Word2Vector.get_word_model() words = txt.split() return json.dumps(word2vector.most_similar(words, topn=topn), ensure_ascii=False)
import json import os from flask import Flask, request, abort from pynlpini.word2vec.word2vector import Word2Vector from pynlpini.keyword.keyword_extractor import KeywordExtractor from pynlpini.seg.seg_tagger import SegTagger from pipe import * from collections import defaultdict app = Flask(__name__) base_dir = os.path.dirname(__file__) phrase2vector = Word2Vector.get_phrase_model() phrase_vocabs = phrase2vector.vocab.keys() keyword_extractor = KeywordExtractor(SegTagger()) tag_dict = defaultdict(set) with open(base_dir + "/tag.csv") as tag_file: tmp_dict = tag_file.readlines() | select(lambda x: x.decode("utf-8").strip()) | where( lambda x: len(x) > 0) | select( lambda x: (x.split()[0].strip(), x.split()[1].strip())) | as_dict for k, v in tmp_dict.iteritems(): if k in phrase_vocabs and v in phrase_vocabs: tag_dict[v].add(k) @app.route('/tag', methods=['GET', 'POST']) def tag():