def vecCompare(cs): client = MG() db = client.spider_data collect = db.baiduqa_ data = collect.find() #设置答案对象 answer = Answer() #初始化比较值 sim_best = 0 sim_sec = 0 for item in data: #两个问句的相似度比较 sim_num = vector_similarity(item['pureQstr'], cs) if sim_num > sim_best: answer.matchQ1 = item['title'] answer.bestAnswer = item['answer'] sim_best = sim_num continue if sim_num > sim_sec: answer.matchQ2 = item['title'] answer.secAnswer = item['answer'] sim_sec = sim_num return answer
def Keyword_Compare(cs): client=MG() db=client.spider_data collect=db.baiduqa_ data=collect.find() collect2=db.baiduQaFormal data2=collect2.find() csx=jieba.lcut(cs,cut_all=True) tmplen=0 best=0 sec=0 bestAns='' matchQ1='' secAns='' matchQ2='' for item in data: lx=item['keyword'] ret=[x for x in csx if x in lx ] tmplen=len(ret) if tmplen>best: matchQ1=item['title'] bestAns=item['answer'] best=tmplen continue if tmplen>sec: matchQ2=item['title'] secAns=item['answer'] sec=tmplen for item in data2: lx=item['keyword'] ret=[x for x in csx if x in lx ] tmplen=len(ret) if tmplen>best: matchQ1=item['title'] bestAns=item['answer'] best=tmplen continue if tmplen>sec: matchQ2=item['title'] secAns=item['answer'] sec=tmplen answer=Answer answer.bestAnswer=bestAns answer.matchQ1=matchQ1 answer.best=best answer.secAnswer=secAns answer.matchQ2=matchQ2 answer.sec=sec return answer
def build(): client=MG() db=client.spider_data collect=db.baiduQaFormal data=collect.find() for item in data: precut=item['title'] print(precut) cutted=jieba.lcut(precut,cut_all=True) collect.update({"_id":item["_id"]},{"$set":{"keyword":cutted}})
def __init__(self): mybot_path = './' self.mybot = aiml.Kernel() pathZ = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) print(pathZ) #加载机器人对答模板文件 self.mybot.learn(pathZ + '/COMM_robot/std-startup.xml') self.mybot.learn(pathZ + '/COMM_robot/Common_conversation.aiml') self.mybot.learn(pathZ + '/COMM_robot/question_analyse.aiml') #加载预制字典 jieba.load_userdict(pathZ + '/COMM_robot/text_dict.txt') #mongodb配置 client = MG() db = client.spider_data self.collect = db.baidu_baike_3_test
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "law_data" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "127.0.0.1", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "line" #无需修改,链接mongodb self.MGclient = MG("mongodb://*****:*****@localhost:27017") #可修改,指定数据库名称 self.db = self.MGclient.spider_data #可修改,指定collection的名称 self.collect = self.db.LAW
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "news_case" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "127.0.0.1", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "case" #无需修改,链接mongodb self.MGclient = MG() #可修改,指定数据库名称 self.db = self.MGclient.spider_data #可修改,指定collection的名称 self.collect = self.db.tagged_case
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "baike_data_abstract" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "localhost", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "knowledge" #无需修改,链接mongodb self.MGclient = MG("mongodb://*****:*****@localhost:27017") #可修改,指定数据库名称 self.db = self.MGclient.spider_data #可修改,指定collection的名称 self.collect = self.db.baidu_baike_BIG
def __init__(self): '''初始化设置''' #可修改:定义索引名称 self._index = "qa_data" #可修改,但一般不需要,定义es服务器设置 self.es = ES([{"host": "localhost", "port": 9200}]) #可修改:定义文档类型 self.doc_type = "qa" #无需修改,链接mongodb self.MGclient = MG("mongodb://*****:*****@localhost:27017") #可修改,指定数据库名称 self.db = self.MGclient.spider_data #self.db.authenticate("reader","reader") #可修改,指定collection的名称 self.collect = self.db.qa_byHand
#导入法条法规 刑法 from pymongo import MongoClient as MG import re file = open( 'D:\\Google Download\\#按日期管理的下载\\190404\\法律法规大全\\程序法\\中华人民共和国仲裁法.txt') line = file.readline() client = MG() db = client.spider_data collect = db.LAW lawDoc = dict() lawDoc['name'] = line lawDoc['content'] = list() #re规则 bian = re.compile('第.*编!') zhang = re.compile('第.*章!') jie = re.compile('第.*节!') tiaoT = re.compile('第.*?条!') tiao = re.compile('第.*?条') fuze = re.compile('附!则') #根据法条编制规则控制进程 ctrlcount = 0 am_i_in_tiao = 0 #写入进程开始 while line: print(line) pureline = line.replace(u'\u3000', '!')
def __init__(self): self.client = MG()
from pymongo import MongoClient as MG import re import datetime client = MG("mongodb://127.0.0.1:27017") db = client.NseDb #db.NseCodesCollection.insert({"_id":"NseCodes","Codes":[]}) NseCodesFile = open("C:\\Users\\Ranjith\\Desktop\\NSE Scripts\\CompleteList.txt") NseHistoryFile = open("C:\\Users\\Ranjith\\Desktop\\NSE Scripts\\NseHistory.txt") class InsertNseData : def __init__(self): self.InsertData() def InsertData(self): for Line in NseHistoryFile.readlines()[:1]: Code = re.sub("'","",Line.split(':')[0]); print (Code) CodeLen = len(Code) History = Line[CodeLen+2:-2] History = History.split("datetime.date") print (History) for Data in History[1:]: #print (Data) if "'" in Data: continue Date, ClosePrice = Data.split(':')
def __init__(self): self.client = MG("mongodb://*****:*****@localhost:27017")