Beispiel #1
0
def vecCompare(cs):
    client = MG()
    db = client.spider_data
    collect = db.baiduqa_
    data = collect.find()

    #设置答案对象
    answer = Answer()

    #初始化比较值
    sim_best = 0
    sim_sec = 0

    for item in data:

        #两个问句的相似度比较
        sim_num = vector_similarity(item['pureQstr'], cs)

        if sim_num > sim_best:
            answer.matchQ1 = item['title']
            answer.bestAnswer = item['answer']
            sim_best = sim_num
            continue
        if sim_num > sim_sec:
            answer.matchQ2 = item['title']
            answer.secAnswer = item['answer']
            sim_sec = sim_num

    return answer
def Keyword_Compare(cs):
    client=MG()
    db=client.spider_data
    collect=db.baiduqa_
    data=collect.find()

    collect2=db.baiduQaFormal
    data2=collect2.find()

    csx=jieba.lcut(cs,cut_all=True)

    tmplen=0
    best=0
    sec=0
    bestAns=''
    matchQ1=''
    secAns=''
    matchQ2=''

    for item in data:
        lx=item['keyword']
        ret=[x for x in csx if x in lx ]
        tmplen=len(ret)

        if tmplen>best:
            matchQ1=item['title']
            bestAns=item['answer']
            best=tmplen
            continue
        if tmplen>sec:
            matchQ2=item['title']
            secAns=item['answer']
            sec=tmplen

    for item in data2:
        lx=item['keyword']
        ret=[x for x in csx if x in lx ]
        tmplen=len(ret)

        if tmplen>best:
            matchQ1=item['title']
            bestAns=item['answer']
            best=tmplen
            continue
        if tmplen>sec:
            matchQ2=item['title']
            secAns=item['answer']
            sec=tmplen

    answer=Answer
    answer.bestAnswer=bestAns
    answer.matchQ1=matchQ1
    answer.best=best
    answer.secAnswer=secAns
    answer.matchQ2=matchQ2
    answer.sec=sec

    return answer
Beispiel #3
0
def build():
    client=MG()
    db=client.spider_data
    collect=db.baiduQaFormal
    data=collect.find()

    for item in data:
        precut=item['title']

        print(precut)
        cutted=jieba.lcut(precut,cut_all=True)

        collect.update({"_id":item["_id"]},{"$set":{"keyword":cutted}})
Beispiel #4
0
    def __init__(self):
        mybot_path = './'
        self.mybot = aiml.Kernel()
        pathZ = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
        print(pathZ)

        #加载机器人对答模板文件
        self.mybot.learn(pathZ + '/COMM_robot/std-startup.xml')
        self.mybot.learn(pathZ + '/COMM_robot/Common_conversation.aiml')
        self.mybot.learn(pathZ + '/COMM_robot/question_analyse.aiml')

        #加载预制字典
        jieba.load_userdict(pathZ + '/COMM_robot/text_dict.txt')

        #mongodb配置
        client = MG()
        db = client.spider_data
        self.collect = db.baidu_baike_3_test
Beispiel #5
0
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "law_data"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "127.0.0.1", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "line"

        #无需修改,链接mongodb
        self.MGclient = MG("mongodb://*****:*****@localhost:27017")

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #可修改,指定collection的名称
        self.collect = self.db.LAW
Beispiel #6
0
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "news_case"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "127.0.0.1", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "case"

        #无需修改,链接mongodb
        self.MGclient = MG()

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #可修改,指定collection的名称
        self.collect = self.db.tagged_case
Beispiel #7
0
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "baike_data_abstract"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "localhost", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "knowledge"

        #无需修改,链接mongodb
        self.MGclient = MG("mongodb://*****:*****@localhost:27017")

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #可修改,指定collection的名称
        self.collect = self.db.baidu_baike_BIG
    def __init__(self):
        '''初始化设置'''

        #可修改:定义索引名称
        self._index = "qa_data"

        #可修改,但一般不需要,定义es服务器设置
        self.es = ES([{"host": "localhost", "port": 9200}])

        #可修改:定义文档类型
        self.doc_type = "qa"

        #无需修改,链接mongodb
        self.MGclient = MG("mongodb://*****:*****@localhost:27017")

        #可修改,指定数据库名称
        self.db = self.MGclient.spider_data

        #self.db.authenticate("reader","reader")
        #可修改,指定collection的名称
        self.collect = self.db.qa_byHand
Beispiel #9
0
#导入法条法规 刑法

from pymongo import MongoClient as MG
import re

file = open(
    'D:\\Google Download\\#按日期管理的下载\\190404\\法律法规大全\\程序法\\中华人民共和国仲裁法.txt')
line = file.readline()

client = MG()
db = client.spider_data
collect = db.LAW
lawDoc = dict()
lawDoc['name'] = line
lawDoc['content'] = list()

#re规则
bian = re.compile('第.*编!')
zhang = re.compile('第.*章!')
jie = re.compile('第.*节!')
tiaoT = re.compile('第.*?条!')
tiao = re.compile('第.*?条')
fuze = re.compile('附!则')
#根据法条编制规则控制进程
ctrlcount = 0
am_i_in_tiao = 0

#写入进程开始
while line:
    print(line)
    pureline = line.replace(u'\u3000', '!')
Beispiel #10
0
    def __init__(self):

        self.client = MG()
Beispiel #11
0
from pymongo import MongoClient as MG
import re
import datetime

client = MG("mongodb://127.0.0.1:27017")
db = client.NseDb
#db.NseCodesCollection.insert({"_id":"NseCodes","Codes":[]})

NseCodesFile = open("C:\\Users\\Ranjith\\Desktop\\NSE Scripts\\CompleteList.txt")
NseHistoryFile = open("C:\\Users\\Ranjith\\Desktop\\NSE Scripts\\NseHistory.txt")

class InsertNseData :
	
	def __init__(self):
		self.InsertData()

	def InsertData(self):

		for Line in NseHistoryFile.readlines()[:1]:
			
			Code = re.sub("'","",Line.split(':')[0]); print (Code)
			CodeLen = len(Code)
			History = Line[CodeLen+2:-2]
			History = History.split("datetime.date")
			print (History)
			for Data in History[1:]:
				#print (Data)
				if "'" in Data:
					continue
				
				Date, ClosePrice = Data.split(':')
    def __init__(self):

        self.client = MG("mongodb://*****:*****@localhost:27017")