Example #1
0
 def train_with_gensim(self):
     self.status = 1
     push.push_to_rtx(push.generate_rtx_markdown("gensim转子引擎开始加热"))
     self.tc_wv_model = KeyedVectors.load_word2vec_format(
         './Tencent_AILab_ChineseEmbedding.txt', binary=False)
     push.push_to_rtx(push.generate_rtx_markdown("gensim转子引擎加热完毕"))
     self.status = 2
Example #2
0
 def switch_log_to_bot():
     global log_to_bot
     log_to_bot = not log_to_bot
     if (log_to_bot):
         push.push_to_rtx(push.generate_rtx_markdown("bot调用日志已开启"))
     else:
         push.push_to_rtx(push.generate_rtx_markdown("bot调用日志已关闭"))
     return "succ"
Example #3
0
 def __init__(self):
     # 0: 未训练
     # 1: 正在训练gensim版
     # 2: gensim版可用
     # 3: 正在训练annoy
     # 4: annoy版可用
     self.status = 0
     push.push_to_rtx(push.generate_rtx_markdown("wordcalc出仓状态良好"))
Example #4
0
 def do_word_calc():
     global word_calc, log_to_bot
     # GET 和 POST 都行
     if request.method == 'POST':
         post_body = str(request.data.decode('utf-8'))
     elif request.method == 'GET':
         post_body = str(request.args.get("q"))
     if "" == post_body:
         resp = {
             "ret": 0,
             "msgtype": "markdown",
             "text": "喵喵喵?",
             "pic": "nopic"
         }
         return json.dumps(resp, indent=4, ensure_ascii=False)
     # 解析提问句
     pos, neg = parse_formular(post_body)
     # 使用 gensim/annoy
     result = word_calc.calc(pos, neg)
     if log_to_bot:
         push.push_to_rtx(
             push.generate_rtx_markdown(post_body + "=\r\n" + str(result)))
     # 过滤掉重复词
     filtered_result = []
     for item in result:
         if item[0] not in pos and item[0] not in neg:
             filtered_result.append(item[0])
     # 生成文本回复
     resp_choices = []
     if len(filtered_result) == 0:
         resp_choices.append("臣妾实在算不出啊")
         resp_choices.append("算晕了,今天天气不错,用云计算试试?")
         resp_choices.append("程序已崩溃")
         resp_choices.append("爆炸倒计时: 3...2...1...")
     elif len(filtered_result) < 3:
         item = random.choice(filtered_result)
         resp_choices.append(item)
         resp_choices.append("也许等于" + item + "?")
         resp_choices.append("答案是" + item + "~")
     else:
         items = random.choices(filtered_result, k=3)
         resp_choices.append("大概也许是{0}、{1}或{2}".format(
             items[0], items[1], items[2]))
         resp_choices.append("等于{0}".format(items[0]))
         resp_choices.append("我算出来的结果是{0}".format(items[0]))
         resp_choices.append("大概也许是{0}".format(items[0]))
         resp_choices.append("答案是{0}、{1}或{2}".format(
             items[0], items[1], items[2]))
         resp_choices.append("你是想说{0}和{1}吗?".format(items[0], items[1]))
     # 按业务约定返回结果
     resp = {
         "ret": 0,
         "msgtype": "markdown",
         "text": random.choice(resp_choices),
         "pic": "nopic"
     }
     return json.dumps(resp, indent=4, ensure_ascii=False)
Example #5
0
 def train_with_annoy(self):
     self.status = 3
     push.push_to_rtx(push.generate_rtx_markdown("annoy向量空间开始注水"))
     self.annoy_index = AnnoyIndexer(self.tc_wv_model, 200)
     fname = 'tc_index_genoy.index'
     self.annoy_index.save(fname)
     # 导出训练结果,以后直接 load 即可
     # annoy_index = AnnoyIndexer()
     # annoy_index.load(fname)
     # annoy_index.model = tc_wv_model
     push.push_to_rtx(push.generate_rtx_markdown("annoy向量空间注水完毕"))
     self.status = 4
Example #6
0
def do_push():
    ori_url = spider.get_today_joke_url()
    myhtml = spider.get_html_from_url(ori_url)
    if None == myhtml:
        print("spider failed")
    myjokes = utils.get_qa_from_html(myhtml)
    if None == myjokes:
        print("parse failed")
    rtx_md = push.generate_rtx_markdown(myjokes)
    rtx_card = push.generate_rtx_cardinfo(myjokes, ori_url)
    push.push_to_rtx(rtx_card)
    push.push_to_rtx(rtx_md)
Example #7
0
 def train_with_annoy():
     global word_calc
     push.push_to_rtx(push.generate_rtx_markdown("收到训练annoy请求"))
     word_calc.train_with_annoy()
     push.push_to_rtx(push.generate_rtx_markdown("annoy请求处理完毕"))
     return "succ"
Example #8
0
    # 处理开关日志推送请求
    @app.route('/switch', methods=('GET', 'POST'))
    def switch_log_to_bot():
        global log_to_bot
        log_to_bot = not log_to_bot
        if (log_to_bot):
            push.push_to_rtx(push.generate_rtx_markdown("bot调用日志已开启"))
        else:
            push.push_to_rtx(push.generate_rtx_markdown("bot调用日志已关闭"))
        return "succ"

    # 处理训练annoy有损匹配模型请求
    @app.route('/annoy', methods=('GET', 'POST'))
    def train_with_annoy():
        global word_calc
        push.push_to_rtx(push.generate_rtx_markdown("收到训练annoy请求"))
        word_calc.train_with_annoy()
        push.push_to_rtx(push.generate_rtx_markdown("annoy请求处理完毕"))
        return "succ"

    return app


if __name__ == "__main__":
    # 将数据导入 gensim
    word_calc.train_with_gensim()
    # 拉起 flask web 服务
    push.push_to_rtx(push.generate_rtx_markdown("flask初号机已就位"))
    create_app().run(host='0.0.0.0', port=5000)
Example #9
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from gensim.models import KeyedVectors
from collections import OrderedDict
import json
import time
import push

try:
    from gensim.similarities.index import AnnoyIndexer
except ImportError:
    print('import gensim.annoy error')
    push.push_to_rtx(push.generate_rtx_markdown("gensim引导失败"))
    raise ValueError("anny indexer 加载失败")


class WordCalc:
    def __init__(self):
        # 0: 未训练
        # 1: 正在训练gensim版
        # 2: gensim版可用
        # 3: 正在训练annoy
        # 4: annoy版可用
        self.status = 0
        push.push_to_rtx(push.generate_rtx_markdown("wordcalc出仓状态良好"))

    def train_with_gensim(self):
        self.status = 1
        push.push_to_rtx(push.generate_rtx_markdown("gensim转子引擎开始加热"))
        self.tc_wv_model = KeyedVectors.load_word2vec_format(
            './Tencent_AILab_ChineseEmbedding.txt', binary=False)
Example #10
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
print('hello python')

import push

push.push_to_rtx(push.generate_rtx_markdown("python姿态测试正常"))