def initTries(trie_dict): for k,v in trie_dict.iteritems(): v_arr = [] for value in v: v_arr.append(value) v_arr = pyannotate.toLowerCase(v_arr) tries_all[k] = marisa_trie.Trie(v_arr)
def initTries(trie_dict): for k,v in trie_dict.iteritems(): v_arr = [] for value in v: v_arr.append(unicode(value,errors="ignore")) v_arr = pyannotate.toLowerCase(v_arr) tries_all[unicode(k,errors="ignore")] = marisa_trie.Trie(v_arr) tries_all["PERSON"] = marisa_trie.Trie().load("person.marisa")
def initTries(trie_dict): for k,v in trie_dict.iteritems(): v_arr = [] for value in v: v_arr.append(unicode(value,errors="ignore")) v_arr = pyannotate.toLowerCase(v_arr) tries_all[unicode(k,errors="ignore")] = marisa_trie.Trie(v_arr) for name in tries_all: try: tries_all[name] = marisa_trie.Trie().load(name+".marisa") except: pass
def resolveRedirection(sent,title): redirect_labels = ['he',"she","it","we","they"] redirect = pyannotate.toLowerCase(redirect_labels) arr_arr = sent.split(" ") arr = arr_arr[0].lower() if arr in redirect: return " ".join(title.split("_")) + " " + " ".join(arr_arr[1:]) if title.find("_") > -1: if arr in title.split("_"): return title + " " + " ".join(arr_arr[1:]) else: if arr in title.split(" "): return title + " " + " ".join(arr_arr[1:]) return " ".join(arr_arr)
#json.dumps(extractions,indent = 4,separators=(',', ': ')) if __name__ == '__main__': sent = "Charles Dickens wrote books like A Christmas Carol, Anthony and Mayan, A Chritmas Carol" sent1 = "hello from book A Christmas Carol, Anthony to kill me" persons = ["Bill Gates"] authors = ["Charles Dickens"] books = ["A Christmas Carol",""] location = ["Seattle"] company = ["Microsoft","Apple Inc","Apple","Mcafee","Amazon.com","Intel","Google","Nvidia","AMD","Oracle","Sun Microsystems"] trie = {"PERSON":persons, "BOOK":books, "AUTHOR":authors, "COMPANY":company, "LOCATION":location} #test annotation ptrie = marisa_trie.Trie(pyannotate.toLowerCase(persons)) btrie = marisa_trie.Trie(pyannotate.toLowerCase(books)) #pyannotate.annotate(sent,ptrie,1) #test pattern indices start,end = getIndices(sent,"[C|c]arol") #print start, end #read JSON exp = readJson() #main initTries(trie) """ sent = "Bill Gates was born in August 14, 1897 - December 14, 1945. He founded Microsoft"