Python assemble_extracts 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: process_parse

메소드/함수: assemble_extracts

hotexamples.com에서의 예제들: 2

Python assemble_extracts - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 process_parse.assemble_extracts에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: pattern_extractor.py 프로젝트: hausdorf/terr

def get_targets(sent,patt_lines):

	pot_target_list = []
	matched_patt_word = []
	for patt in patt_lines:
		if (not patt):
			continue
		patt = re.sub(COLL_SPACES,SPACES_REPL,patt)

		m = re.findall(patt,sent)
		if m:
			# check forward or backward 
			if(DEBUG):
				print "pattern matched ",m,"for patt ",patt,"and sent",sent
			# Now parse this line
			parsed_sent = parse_file(sent)
			if (not parsed_sent):
				print "could not parse line"+parsed_sent
				continue 
			# NOW NP CHUNK THE SENTENCE
			# First make sense of parsed input  	
			pos_dict,parse_dict,_ = process_parse.pprocess_pline(parsed_sent)
			# the above might return multiple lines 
			for i in xrange(len(pos_dict.keys())):
				pos_sent = pos_dict[i]
				parsed_sent = parse_dict[i]
				# NP chunking algo
				np_sent= process_parse.extract_np(pos_sent,parsed_sent)
				np_chunk_sent = process_parse.assemble_extracts(np_sent)
				# MATCHES BACK PATTERN 
				# use the first word in the patt to split the parsed sentence 
				patt = patt.strip()
				if(not patt):
					print "patt was empty line move to next"
					continue
				split_patt = patt.split()
				split_word = split_patt[0]
				split_word = split_word.strip()
				if split_word in matched_patt_word:
					if(DEBUG):
						print "###not matching back pattern since back pattern with same key word was matched ,back key word =",split_word
					continue
				m_temp  = re.search(split_word,np_chunk_sent)
				if(not m_temp):
					print "split word=",split_word,"not in sent"
					continue 
				pot_target_list = get_np(split_word,np_chunk_sent,BACK,'target')
					 
	# search for AND IN THE np if it exists divide the np into two parts 		
	new_list = and_detector(pot_target_list)	
	return new_list

예제 #2

파일 보기

파일: pattern_extractor.py 프로젝트: hausdorf/terr

def get_perpi(sent,patt_lines):

	pot_perpi_list = []
	matched_patt_word = []
	for patt in patt_lines:

		if (not patt):
			continue
		#m2 = re.search('MURDERED',patt)
		#if m2:
		#	print "patt",patt
		# collapse multiple white spaces 
		patt = re.sub(COLL_SPACES,SPACES_REPL,patt)
		# check if any of the victim patterns exist for this line
		m = re.findall(patt,sent)
		if m:
			# check forward or backward 
			if(DEBUG):
				print "pattern matched ",m,"for patt ",patt,"and sent",sent
			# Now parse this line
			parsed_sent = parse_file(sent)
			if (not parsed_sent):
				print "could not parse line"+parsed_sent
				continue 
			# NOW NP CHUNK THE SENTENCE
			# First make sense of parsed input  	
			pos_dict,parse_dict,_ = process_parse.pprocess_pline(parsed_sent)
			# the above might return multiple lines 
			for i in xrange(len(pos_dict.keys())):
				pos_sent = pos_dict[i]
				parsed_sent = parse_dict[i]
				# NP chunking algo
				np_sent= process_parse.extract_np(pos_sent,parsed_sent)
				np_chunk_sent = process_parse.assemble_extracts(np_sent)

				if(is_front(patt)):
					#perpi just have one word ( as of now) so just split by word
					patt = patt.strip()
					if(not patt):
						print "patt was empty line move to next"
						continue
					split_patt = patt.split()
					split_word = split_patt[0]
					split_word = split_word.strip()
					# THIS MAKES SURE THAT a FONT PATT IS NOT MATCHED AGAIN BY BACK PATT 
					#matched_patt_word.append(split_word)
					m_temp  = re.search(split_word,np_chunk_sent)
					if(not m_temp):
						print "split word=",split_word,"not in sent"
						continue 
					pot_perpi_list = get_np(split_word,np_chunk_sent,FRONT,'perpi')
					if(len(pot_perpi_list) > 0): 
						# THIS MAKES SURE THAT a FONT PATT IS NOT MATCHED AGAIN BY BACK PATT 
						matched_patt_word.append(split_word)
				else:
					# MATCHES BACK PATTERN 
					# Back patterns have three words ..second last word is the main word / split word  
					patt = patt.strip()
					if(not patt):
						print "patt was empty line move to next"
						continue
					split_patt = patt.split()
					# second last word or second word is the main word  
					split_word = split_patt[1]
					split_word = split_word.strip()
					if split_word in matched_patt_word:
						print "###not matching back pattern since back pattern with same key word was matched ,back key word =",split_word
						continue
					m_temp  = re.search(split_word,np_chunk_sent)
					if(not m_temp):
						print "split word=",split_word,"not in sent"
						continue 
					pot_perpi_list = get_np(split_word,np_chunk_sent,BACK,'perpi')
					 
	# search for AND IN THE np if it exists divide the np into two parts 		
	new_list = and_detector(pot_perpi_list)	
	
	return new_list