Beispiel #1
0
def Begin_to_identify(request):  # index页面需要一开始就加载的内容写在这里
    context = {}
    ctx = {}
    if request.POST:
        key = request.POST["user_text"]
        thu1 = pre_load_thu
        # 使用thulac进行分词 TagList[i][0]代表第i个词
        # TagList[i][1]代表第i个词的词性
        key = key.strip()
        TagList = thu1.cut(key, text=False)#[[词,词性], [词,词性], []]
        text = ""
        NE_List = get_NE(key)  # 获取实体列表

        for pair in NE_List:  # 根据实体列表,显示各个实体
            if pair[1] == 0:
                text += pair[0]
                continue
            if temporaryok(pair[1]):  # 判断实体词性
                # text += "<a href='#'  data-original-title='" + get_explain(
                #     pair[1]
                # ) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(pair[1]) + "' class='popovers'>" + pair[0] + "</a>"

                # continue
                text += "<a href='#'  data-original-title='" + pair[1]+ "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='" + pair[1] + "' class='popovers'>" + pair[0] + "</a>"
                continue

                # text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" + get_explain(
                #     pair[1]) + "'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(pair[1]) + "' class='popovers'>" + pair[0] + "</a>"
            # "http://stockdata.stock.hexun.com/gszl/s000001.shtml"

            text += "<a href='http://stockdata.stock.hexun.com/gszl/s"+str(pair[1])+".shtml'>"+str(pair[0])+"</a>"
            # text += "<a href='http://stockdata.stock.hexun.com/gszl/s"+str(pair[1])+".shtml'>"+str(pair[0])+"</a>"

            # text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" +pair[1]+"'  data-placement='top' data-trigger='hover' data-content='" + pair[1]+ "' class='popovers'>" + pair[0] + "</a>"
            # <a href="detail.html?title=平安银行   data-original-title=类别 data-placement="top" data-trigger="hover" data-content="类别描述" class="popovers" ">平安银行<a>
            #   跳转链接,  应该只是跳转个链接带个titile,  这些属性应该是在<a>标签之上的。

        ctx['rlt'] = text  # 将实体对应类别和描述,+ 对应单词放入ctx字典,以key=rlt进行查询

        seg_word = ""
        length = len(TagList)  # TagList分词后的数量
        for t in TagList:  # 测试打印词性序列
            seg_word += t[0] + " <strong><small>[" + t[1] + "]</small></strong> "  # 将单词和词向进行添加标签
        seg_word += ""  # 后面加入""
        ctx['seg_word'] = seg_word  # 以seg_word的key进行查询

    return  render(request, "index.html", ctx)#返回主页面
Beispiel #2
0
def ER_post2(request):
    """
    sdf
    :param request:
    :return:
    """
    global parse_util
    ctx = {}
    if request.POST:
        org_text = request.POST['user_text']
        text = ""
        if parse_util is None:
            parse_util = ParseUtil.Parse_Util()
        words, postags, netags, arcs = parse_util.parse_sentence(org_text)
        NE_List = get_NE_List(words, netags)
        for pair in NE_List:  # 根据实体列表,显示各个实体
            if pair[1] == 'O':
                text += pair[0]
                continue
            if temporaryok(pair[1]):
                text += "<a href='#'  data-original-title='" + get_explain(
                    pair[1]
                ) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(
                    pair[1]) + "' class='popovers'>" + pair[0] + "</a>"
                continue

            text += "<a href='detail.html?title=" + pair[
                0] + "'  data-original-title='" + get_explain(
                    pair[1]
                ) + "'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(
                    pair[1]) + "' class='popovers'>" + pair[0] + "</a>"
        ctx['rlt'] = text
    seg_word = ""
    for index in range(len(words)):  # 测试打印词性序列
        seg_word += words[index] + " <strong><small>[" + postags[
            index] + "]</small></strong> "
    seg_word += ""
    ctx['seg_word'] = seg_word
    return render(request, "index.html", ctx)
Beispiel #3
0
def ER_post(request):
	ctx ={}
	if request.POST:
		key = request.POST['user_text']
		thu1 = pre_load_thu  #提前加载好了
		# 使用thulac进行分词 TagList[i][0]代表第i个词
		# TagList[i][1]代表第i个词的词性
		key = key.strip()
		TagList = thu1.cut(key, text=False)
		text = ""
		NE_List = get_NE(key)  #获取实体列表
		
		for pair in NE_List:   #根据实体列表,显示各个实体
			if pair[1] == 0:
				text += pair[0]
				continue
			if temporaryok(pair[1]):
				text += "<a href='#'  data-original-title='" + get_explain(pair[1]) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
				continue
			
			text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" + get_explain(pair[1]) + "'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
		
		ctx['rlt'] = text

				
		seg_word = ""
		length = len(TagList)
		for t in TagList:   #测试打印词性序列
			seg_word += t[0]+" <strong><small>["+t[1]+"]</small></strong> "
		seg_word += ""
		ctx['seg_word'] = seg_word
		
		
		
		
	return render(request, "index.html", ctx)
Beispiel #4
0
def ER_post(request):
    ctx = {}
    if request.POST:
        key = request.POST['user_text']
        thu1 = pre_load_thu  #提前加载好了
        # 使用thulac进行分词 TagList[i][0]代表第i个词
        # TagList[i][1]代表第i个词的词性
        key = key.encode('utf-8').strip()
        TagList = thu1.cut(key, text=False)
        text = ""
        NE_List = get_NE(key)  #获取实体列表

        for pair in NE_List:  #根据实体列表,显示各个实体
            if pair[1] == 0:
                text += pair[0]
                continue
            if temporaryok(pair[1]):
                text += "<a href='#'  data-original-title='" + get_explain(
                    pair[1]
                ) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(
                    pair[1]) + "' class='popovers'>" + pair[0] + "</a>"
                continue

            text += "<a href='detail.html?title=" + pair[
                0] + "'  data-original-title='" + get_explain(
                    pair[1]
                ) + "'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(
                    pair[1]) + "' class='popovers'>" + pair[0] + "</a>"

        ctx['rlt'] = text

        #		while i < length:
        #			# 尝试将2个词组合,若不是NE则组合一个,还不是就直接打印文本
        #			p1 = TagList[i][0]
        #			p2 = "*-"  # 保证p2没被赋值时,p1+p2必不存在
        #			if i+1 < length:
        #				p2 = TagList[i+1][0]
        #
        #			t1 = TagList[i][1]
        #			t2 = "*-"
        #			if i+1 < length:
        #				t2 = TagList[i+1][1]
        #
        #			p = p1 + p2
        #			if i+1 < length and preok(t1) and nowok(t2):
        #				answer = db.matchHudongItembyTitle(p)
        #				if answer != None:
        #					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t2) + "'>" + p + "</a>"
        #					i += 2
        #					continue
        #
        #			p = p1
        #			if nowok(t1):
        #				answer = db.matchHudongItembyTitle(p)
        #				if answer != None:
        #					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t1) + "'>" + p + "</a>"
        #					i += 1
        #					continue
        #				elif temporaryok(t1):
        #					text += "<a href='#' data-toggle='tooltip' title='" + get_explain(t1) + "(暂无资料)'>" + p + "</a>"
        #					i += 1
        #					continue
        #
        #
        #			i += 1
        #			text += str(p)

        seg_word = ""
        length = len(TagList)
        for t in TagList:  #测试打印词性序列
            seg_word += t[0] + " <strong><small>[" + t[
                1] + "]</small></strong> "
        seg_word += ""
        ctx['seg_word'] = seg_word

    return render(request, "index.html", ctx)
def ER_post(request):
	ctx ={}
	if request.POST:
		key = request.POST['user_text']
		thu1 = pre_load_thu  #提前加载好了
		# 使用thulac进行分词 TagList[i][0]代表第i个词
		# TagList[i][1]代表第i个词的词性
		key = key.strip()
		TagList = thu1.cut(key, text=False)
		text = ""
		NE_List = get_NE(key)  #获取实体列表
		
		for pair in NE_List:   #根据实体列表,显示各个实体
			if pair[1] == 0:
				text += pair[0]
				continue
			if temporaryok(pair[1]):
				text += "<a href='#'  data-original-title='" + get_explain(pair[1]) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
				continue
			
			text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" + get_explain(pair[1]) + "'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
		
		ctx['rlt'] = text
			
#		while i < length:
#			# 尝试将2个词组合,若不是NE则组合一个,还不是就直接打印文本
#			p1 = TagList[i][0]
#			p2 = "*-"  # 保证p2没被赋值时,p1+p2必不存在
#			if i+1 < length:
#				p2 = TagList[i+1][0]
#				
#			t1 = TagList[i][1]
#			t2 = "*-"
#			if i+1 < length:
#				t2 = TagList[i+1][1]
#			
#			p = p1 + p2
#			if i+1 < length and preok(t1) and nowok(t2):
#				answer = db.matchHudongItembyTitle(p)
#				if answer != None:
#					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t2) + "'>" + p + "</a>"
#					i += 2
#					continue
#			
#			p = p1
#			if nowok(t1):
#				answer = db.matchHudongItembyTitle(p)
#				if answer != None:
#					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t1) + "'>" + p + "</a>"
#					i += 1
#					continue
#				elif temporaryok(t1):
#					text += "<a href='#' data-toggle='tooltip' title='" + get_explain(t1) + "(暂无资料)'>" + p + "</a>"
#					i += 1
#					continue
#					
#					
#			i += 1
#			text += str(p)
				
		seg_word = ""
		length = len(TagList)
		for t in TagList:   #测试打印词性序列
			seg_word += t[0]+" <strong><small>["+t[1]+"]</small></strong> "
		seg_word += ""
		ctx['seg_word'] = seg_word
		
	return render(request, "index.html", ctx)
                            #过滤掉<doc >  </doc> 等无用行
                            if (len(line) < 2 or line[0:4] == '<doc'
                                    or line[0:6] == "</doc>"):
                                continue
                            #分句
                            statements = CutStatements(line)
                            for statement in statements:
                                #分词
                                cutResult = get_NE(statement.strip())
                                #得到每句话的实体列表后,两两匹配查询是否具有某种关系,如果有的话就写到文件中
                                #entityList 存储实体列表和实体出现的位置,entity1存储实体名称,entity1Index存储实体位置
                                entityList = []
                                nowIndex = -1
                                for word in cutResult:
                                    if (word[1] != 0
                                            and not temporaryok(word[1])):
                                        entity1Index = statement.index(
                                            word[0], nowIndex + 1)
                                        entityList.append({
                                            'entity1':
                                            word[0],
                                            'entity1Index':
                                            entity1Index
                                        })
                                        nowIndex = entity1Index + len(
                                            word[0]) - 1

                                entityNumber = len(entityList)
                                for i in range(entityNumber):
                                    answer = None
                                    #answer = entityRelationDict.get(entityList[i].get('entity1'))
Beispiel #7
0
							if(count%100 == 0):
								print(filePath+"  "+str(count))
							#过滤掉<doc >  </doc> 等无用行
							if(len(line)< 2 or line[0:4] == '<doc' or line[0:6] == "</doc>"):
								continue
							#分句
							statements = CutStatements(line)
							for statement in statements:
								#分词
								cutResult = get_NE(statement.strip())
								#得到每句话的实体列表后,两两匹配查询是否具有某种关系,如果有的话就写到文件中
								#entityList 存储实体列表和实体出现的位置,entity1存储实体名称,entity1Index存储实体位置
								entityList = []
								nowIndex = -1
								for word in cutResult:
									if(word[1]!=0 and not temporaryok(word[1])):
										entity1Index = statement.index(word[0],nowIndex+1)
										entityList.append({'entity1':word[0],'entity1Index':entity1Index})
										nowIndex = entity1Index+len(word[0])-1

								entityNumber = len(entityList)
								for i in range(entityNumber):
									answer = None
									#answer = entityRelationDict.get(entityList[i].get('entity1'))
									#if(entityRelationDict.get(entityList[i].get('entity1')) is None):
									answer = db.findRelationBetweenEntities(entityList[i].get('entity1'))
										#entityRelationDict[entityList[i].get('entity1')] = answer
									for relation in answer:
										#对neo4j的返回值进行处理,原来的返回值中包含一些没用的字符,最终得到的关系是rel,实体是entity2
										if(len(str(relation['rel']).split("\"")) < 2):
											continue