Esempio n. 1
0
File: demo.py Progetto: 52nlp/WikiKB
def initTries(trie_dict):
	for k,v in trie_dict.iteritems():
		v_arr = []
		for value in v:
			v_arr.append(value)

		v_arr = pyannotate.toLowerCase(v_arr)
		tries_all[k] = marisa_trie.Trie(v_arr)
Esempio n. 2
0
def initTries(trie_dict):
	for k,v in trie_dict.iteritems():
		v_arr = []
		for value in v:
			v_arr.append(unicode(value,errors="ignore"))

		v_arr = pyannotate.toLowerCase(v_arr)
		tries_all[unicode(k,errors="ignore")] = marisa_trie.Trie(v_arr)
	
	tries_all["PERSON"] = marisa_trie.Trie().load("person.marisa")
Esempio n. 3
0
def initTries(trie_dict):
	for k,v in trie_dict.iteritems():
		v_arr = []
		for value in v:
			v_arr.append(unicode(value,errors="ignore"))

		v_arr = pyannotate.toLowerCase(v_arr)
		tries_all[unicode(k,errors="ignore")] = marisa_trie.Trie(v_arr)
	for name in tries_all:
		try:
			tries_all[name] = marisa_trie.Trie().load(name+".marisa")
		except:
			pass
Esempio n. 4
0
File: demo.py Progetto: 52nlp/WikiKB
def resolveRedirection(sent,title):
	redirect_labels = ['he',"she","it","we","they"]
	redirect = pyannotate.toLowerCase(redirect_labels)

	arr_arr = sent.split(" ")
	arr = arr_arr[0].lower()
	if arr in redirect:
		return " ".join(title.split("_")) + " " + " ".join(arr_arr[1:])

	if title.find("_") > -1:
		if arr in title.split("_"):
			return title + " " + " ".join(arr_arr[1:]) 
	else:
		if arr in title.split(" "):
			return title + " " + " ".join(arr_arr[1:])
	return " ".join(arr_arr)
Esempio n. 5
0
				#json.dumps(extractions,indent = 4,separators=(',', ': '))

if __name__ == '__main__':	
	sent = "Charles Dickens wrote books like A Christmas Carol, Anthony and Mayan, A Chritmas Carol"
	sent1 = "hello from book A Christmas Carol, Anthony to kill me"

	persons = ["Bill Gates"]
	authors = ["Charles Dickens"]
	books = ["A Christmas Carol",""]
	location = ["Seattle"]
	company = ["Microsoft","Apple Inc","Apple","Mcafee","Amazon.com","Intel","Google","Nvidia","AMD","Oracle","Sun Microsystems"]

	trie = {"PERSON":persons, "BOOK":books, "AUTHOR":authors, "COMPANY":company, "LOCATION":location}
	
	#test annotation
	ptrie = marisa_trie.Trie(pyannotate.toLowerCase(persons))
	btrie = marisa_trie.Trie(pyannotate.toLowerCase(books))
	#pyannotate.annotate(sent,ptrie,1)

	#test pattern indices
	start,end = getIndices(sent,"[C|c]arol")
	#print start, end

	#read JSON
	exp = readJson()

	#main
	initTries(trie)
	
	"""
	sent = "Bill Gates was born in August 14, 1897 - December 14, 1945. He founded Microsoft"