Python get_NE 예제들, toolkit.NER.get_NE Python 예제들

예제 #1

0

파일 보기

def Begin_to_identify(request):  # index页面需要一开始就加载的内容写在这里
    context = {}
    ctx = {}
    if request.POST:
        key = request.POST["user_text"]
        thu1 = pre_load_thu
        # 使用thulac进行分词 TagList[i][0]代表第i个词
        # TagList[i][1]代表第i个词的词性
        key = key.strip()
        TagList = thu1.cut(key, text=False)#[[词，词性], [词，词性], []]
        text = ""
        NE_List = get_NE(key)  # 获取实体列表

        for pair in NE_List:  # 根据实体列表，显示各个实体
            if pair[1] == 0:
                text += pair[0]
                continue
            if temporaryok(pair[1]):  # 判断实体词性
                # text += "<a href='#'  data-original-title='" + get_explain(
                #     pair[1]
                # ) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(pair[1]) + "' class='popovers'>" + pair[0] + "</a>"

                # continue
                text += "<a href='#'  data-original-title='" + pair[1]+ "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='" + pair[1] + "' class='popovers'>" + pair[0] + "</a>"
                continue

                # text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" + get_explain(
                #     pair[1]) + "'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(pair[1]) + "' class='popovers'>" + pair[0] + "</a>"
            # "http://stockdata.stock.hexun.com/gszl/s000001.shtml"

            text += "<a href='http://stockdata.stock.hexun.com/gszl/s"+str(pair[1])+".shtml'>"+str(pair[0])+"</a>"
            # text += "<a href='http://stockdata.stock.hexun.com/gszl/s"+str(pair[1])+".shtml'>"+str(pair[0])+"</a>"

            # text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" +pair[1]+"'  data-placement='top' data-trigger='hover' data-content='" + pair[1]+ "' class='popovers'>" + pair[0] + "</a>"
            # <a href="detail.html?title=平安银行   data-original-title=类别 data-placement="top" data-trigger="hover" data-content="类别描述" class="popovers" ">平安银行<a>
            #   跳转链接，  应该只是跳转个链接带个titile,  这些属性应该是在<a>标签之上的。

        ctx['rlt'] = text  # 将实体对应类别和描述，+ 对应单词放入ctx字典，以key=rlt进行查询

        seg_word = ""
        length = len(TagList)  # TagList分词后的数量
        for t in TagList:  # 测试打印词性序列
            seg_word += t[0] + " <strong><small>[" + t[1] + "]</small></strong> "  # 将单词和词向进行添加标签
        seg_word += ""  # 后面加入""
        ctx['seg_word'] = seg_word  # 以seg_word的key进行查询

    return  render(request, "index.html", ctx)#返回主页面

예제 #2

0

파일 보기

def ER_post(request):
	ctx ={}
	if request.POST:
		key = request.POST['user_text']
		thu1 = pre_load_thu  #提前加载好了
		# 使用thulac进行分词 TagList[i][0]代表第i个词
		# TagList[i][1]代表第i个词的词性
		key = key.strip()
		TagList = thu1.cut(key, text=False)
		text = ""
		NE_List = get_NE(key)  #获取实体列表
		
		for pair in NE_List:   #根据实体列表，显示各个实体
			if pair[1] == 0:
				text += pair[0]
				continue
			if temporaryok(pair[1]):
				text += "<a href='#'  data-original-title='" + get_explain(pair[1]) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
				continue
			
			text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" + get_explain(pair[1]) + "'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
		
		ctx['rlt'] = text

				
		seg_word = ""
		length = len(TagList)
		for t in TagList:   #测试打印词性序列
			seg_word += t[0]+" <strong><small>["+t[1]+"]</small></strong> "
		seg_word += ""
		ctx['seg_word'] = seg_word
		
		
		
		
	return render(request, "index.html", ctx)

예제 #3

0

파일 보기

def ER_post(request):
    ctx = {}
    if request.POST:
        key = request.POST['user_text']
        thu1 = pre_load_thu  #提前加载好了
        # 使用thulac进行分词 TagList[i][0]代表第i个词
        # TagList[i][1]代表第i个词的词性
        key = key.encode('utf-8').strip()
        TagList = thu1.cut(key, text=False)
        text = ""
        NE_List = get_NE(key)  #获取实体列表

        for pair in NE_List:  #根据实体列表，显示各个实体
            if pair[1] == 0:
                text += pair[0]
                continue
            if temporaryok(pair[1]):
                text += "<a href='#'  data-original-title='" + get_explain(
                    pair[1]
                ) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(
                    pair[1]) + "' class='popovers'>" + pair[0] + "</a>"
                continue

            text += "<a href='detail.html?title=" + pair[
                0] + "'  data-original-title='" + get_explain(
                    pair[1]
                ) + "'  data-placement='top' data-trigger='hover' data-content='" + get_detail_explain(
                    pair[1]) + "' class='popovers'>" + pair[0] + "</a>"

        ctx['rlt'] = text

        #		while i < length:
        #			# 尝试将2个词组合，若不是NE则组合一个，还不是就直接打印文本
        #			p1 = TagList[i][0]
        #			p2 = "*-"  # 保证p2没被赋值时，p1+p2必不存在
        #			if i+1 < length:
        #				p2 = TagList[i+1][0]
        #
        #			t1 = TagList[i][1]
        #			t2 = "*-"
        #			if i+1 < length:
        #				t2 = TagList[i+1][1]
        #
        #			p = p1 + p2
        #			if i+1 < length and preok(t1) and nowok(t2):
        #				answer = db.matchHudongItembyTitle(p)
        #				if answer != None:
        #					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t2) + "'>" + p + "</a>"
        #					i += 2
        #					continue
        #
        #			p = p1
        #			if nowok(t1):
        #				answer = db.matchHudongItembyTitle(p)
        #				if answer != None:
        #					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t1) + "'>" + p + "</a>"
        #					i += 1
        #					continue
        #				elif temporaryok(t1):
        #					text += "<a href='#' data-toggle='tooltip' title='" + get_explain(t1) + "(暂无资料)'>" + p + "</a>"
        #					i += 1
        #					continue
        #
        #
        #			i += 1
        #			text += str(p)

        seg_word = ""
        length = len(TagList)
        for t in TagList:  #测试打印词性序列
            seg_word += t[0] + " <strong><small>[" + t[
                1] + "]</small></strong> "
        seg_word += ""
        ctx['seg_word'] = seg_word

    return render(request, "index.html", ctx)

예제 #4

0

파일 보기

파일: index_ERform_view.py 프로젝트: dimkang/Agriculture_KnowledgeGraph

def ER_post(request):
	ctx ={}
	if request.POST:
		key = request.POST['user_text']
		thu1 = pre_load_thu  #提前加载好了
		# 使用thulac进行分词 TagList[i][0]代表第i个词
		# TagList[i][1]代表第i个词的词性
		key = key.strip()
		TagList = thu1.cut(key, text=False)
		text = ""
		NE_List = get_NE(key)  #获取实体列表
		
		for pair in NE_List:   #根据实体列表，显示各个实体
			if pair[1] == 0:
				text += pair[0]
				continue
			if temporaryok(pair[1]):
				text += "<a href='#'  data-original-title='" + get_explain(pair[1]) + "(暂无资料)'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
				continue
			
			text += "<a href='detail.html?title=" + pair[0] + "'  data-original-title='" + get_explain(pair[1]) + "'  data-placement='top' data-trigger='hover' data-content='"+get_detail_explain(pair[1])+"' class='popovers'>" + pair[0] + "</a>"
		
		ctx['rlt'] = text
			
#		while i < length:
#			# 尝试将2个词组合，若不是NE则组合一个，还不是就直接打印文本
#			p1 = TagList[i][0]
#			p2 = "*-"  # 保证p2没被赋值时，p1+p2必不存在
#			if i+1 < length:
#				p2 = TagList[i+1][0]
#				
#			t1 = TagList[i][1]
#			t2 = "*-"
#			if i+1 < length:
#				t2 = TagList[i+1][1]
#			
#			p = p1 + p2
#			if i+1 < length and preok(t1) and nowok(t2):
#				answer = db.matchHudongItembyTitle(p)
#				if answer != None:
#					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t2) + "'>" + p + "</a>"
#					i += 2
#					continue
#			
#			p = p1
#			if nowok(t1):
#				answer = db.matchHudongItembyTitle(p)
#				if answer != None:
#					text += "<a href='detail.html?title=" + str(p) + "' data-toggle='tooltip' title='" + get_explain(t1) + "'>" + p + "</a>"
#					i += 1
#					continue
#				elif temporaryok(t1):
#					text += "<a href='#' data-toggle='tooltip' title='" + get_explain(t1) + "(暂无资料)'>" + p + "</a>"
#					i += 1
#					continue
#					
#					
#			i += 1
#			text += str(p)
				
		seg_word = ""
		length = len(TagList)
		for t in TagList:   #测试打印词性序列
			seg_word += t[0]+" <strong><small>["+t[1]+"]</small></strong> "
		seg_word += ""
		ctx['seg_word'] = seg_word
		
	return render(request, "index.html", ctx)

예제 #5

0

파일 보기

파일: extractTrainingData.py 프로젝트: zongqitadie/Agriculture_KnowledgeGraph

 if (len(file) > 7 and file[-7:] == 'zh_hans'):
     with open(filePath, 'r') as fr:
         count = 0
         for line in fr:
             count += 1
             if (count % 100 == 0):
                 print(filePath + "  " + str(count))
             #过滤掉<doc >  </doc> 等无用行
             if (len(line) < 2 or line[0:4] == '<doc'
                     or line[0:6] == "</doc>"):
                 continue
             #分句
             statements = CutStatements(line)
             for statement in statements:
                 #分词
                 cutResult = get_NE(statement.strip())
                 #得到每句话的实体列表后，两两匹配查询是否具有某种关系,如果有的话就写到文件中
                 #entityList 存储实体列表和实体出现的位置,entity1存储实体名称，entity1Index存储实体位置
                 entityList = []
                 nowIndex = -1
                 for word in cutResult:
                     if (word[1] != 0
                             and not temporaryok(word[1])):
                         entity1Index = statement.index(
                             word[0], nowIndex + 1)
                         entityList.append({
                             'entity1':
                             word[0],
                             'entity1Index':
                             entity1Index
                         })