def process_word_E_long(question): #print(question) #startTime=time.time() global count k = 1 entities = [] #question=question[0].lower() + question[1:] originalQuestion = question question = question.replace("?", "") question = question.replace(".", "") question = question.replace("!", "") question = question.replace("'s", "") question = question.replace("'", "") question = question.replace("\\", "") question = question.replace("#", "") question = question[0].lower() + question[1:] questionStopWords = stopwords.extract_stop_words_question( question, stopWordsList) combinations = get_question_combinatios(question, questionStopWords) combinations = split_base_on_verb(combinations, originalQuestion) for idx, term in enumerate(combinations): if len(term) == 0: continue if term[0].istitle(): continue ontologyResults = searchIndex.ontologySearch(term) propertyResults = searchIndex.propertySearch(term) if len(ontologyResults) > 2 or len(propertyResults) > 0: del combinations[idx] combinations = merge_comb_stop_words(combinations, question, questionStopWords) combinations = sort_combinations(combinations, question) combinations = merge_entity_prefix(question, combinations, originalQuestion) combinations, compare_found = split_bas_on_comparison(combinations) combinations = extract_abbreviation(combinations) try: for term in combinations: #print(term) entityResults = searchIndex.entitySearch(term) if len(entityResults) > 0: entities.append([entity + [term] for entity in entityResults]) except: return [] results = [] for raw in entities: for entity in sorted(raw, reverse=True, key=lambda x: x[2])[:k]: results.append(entity) #print("Entities:") #print(entities) return [[entity[1], entity[4]] for entity in results]
def evaluate(raw): evaluation=True startTime=time.time() oneQuestion=False global correctRelations global wrongRelations global correctEntities global wrongEntities global count correctRelations = 0 wrongRelations = 0 correctEntities = 0 wrongEntities = 0 count = 1 p_entity=0 r_entity=0 p_relation=0 r_relation=0 k=1 correct=True questionRelationsNumber=0 entities=[] questionWord=raw[0].strip().split(' ')[0] mixedRelations=[] #beforeMixRelations=[] question=raw[0] originalQuestion=question question=question[0].lower() + question[1:] question=question.replace("?","") question=question.replace(".","") question=question.replace("!","") #question=question.replace("'s","") #question=question.replace("'","") question=question.replace("\\","") question=question.replace("#","") questionStopWords=stopwords.extract_stop_words_question(question,stopWordsList) combinations=get_question_combinatios(question,questionStopWords) #combinations=merge_comb_stop_words(combinations,question,questionStopWords) #print(combinations) combinations=split_base_on_verb(combinations,originalQuestion) #combinations=split_base_on_titles(combinations) #print(combinations) combinations=split_base_on_s(combinations) oldCombinations=combinations for idx,term in enumerate(combinations): if len(term)==0: continue if term[0].istitle(): continue; ontologyResults=searchIndex.ontologySearch(term) propertyResults=searchIndex.propertySearch(term) if len(ontologyResults) == 0 and len(propertyResults) == 0: combinations[idx]=term.capitalize() question=question.replace(term,term.capitalize()) combinations=merge_comb_stop_words(combinations,question,questionStopWords) combinations=sort_combinations(combinations,question) combinations=merge_entity_prefix(question,combinations,originalQuestion) combinations,compare_found=split_bas_on_comparison(combinations) combinations=extract_abbreviation(combinations) #print(combinations) i=0 nationalityFlag=False for term in combinations: #print(term) relations=[] properties=[] entities_term=[] if len(term)==0: continue #relations=reRank_results(term,relations) countryImprovement=realtions_entities_country_improvement(term) if countryImprovement != "": #print("hi") nationalityFlag=True entities.append([["country",countryImprovement,0,20,term]]) if (not word_is_verb(term,originalQuestion)) and (term[0].istitle() or len(term.split(' ')) > 2 or (len(term)>1 and len(searchIndex.ontologySearch(term)) < 2 ) or (any(x.isupper() for x in term))) : #print("hi") entityResults=searchIndex.entitySearch(term) if " and " in term: for word in term.split(' and '): entityResults.extend(searchIndex.entitySearch(word.strip())) if " or " in term: for word in term.split(' or '): entityResults.extend(searchIndex.entitySearch(word.strip())) if len(entityResults)!=0: for result in entityResults: if result[1] not in [e[1] for e in entities_term]: entities_term.append(result+[term]) #print(len(entities_term)) entities.append(entities_term) #print(entities) else: ontologyResults=searchIndex.ontologySearch(term) if len(ontologyResults)!=0: for result in ontologyResults: if not (result[1][result[1].rfind('/')+1:][0].istitle()): relations.append(result+[term]) propertyResults=searchIndex.propertySearch(term) if len(propertyResults)!=0: propertyResults=[result+[term] for result in propertyResults] properties=properties+propertyResults mixedRelations.append("") mixedRelations[i]=relations+properties i=i+1 questionRelationsNumber=len(mixedRelations) oldEnities=entities if (len(mixedRelations)==0 and questionWord.lower()=="when") or compare_found: mixedRelations.append([["date","http://dbpedia.org/ontology/date",0,20],["date","http://dbpedia.org/property/date",0,20]]) compare_found=False for i in range(len(mixedRelations)): #print(i) mixedRelations[i]=distinct_relations(mixedRelations[i]) mixedRelations[i],entities=reRank_relations(entities,mixedRelations[i],questionWord,questionRelationsNumber,question,k) mixedRelations=mix_list_items(mixedRelations,k) entities=mix_list_items_entities(entities,k) mixedRelations.extend(relations_improvement_country(entities)) if nationalityFlag: mixedRelations.append(["country","http://dbpedia.org/ontology/country",20]) if oneQuestion: #print("Relations:") #print(mixedRelations) #print("Entities:") #print(entities) return if(evaluation): numberSystemRelations=len(raw[2]) intersection= set(raw[2]).intersection([tup[1] for tup in mixedRelations]) if numberSystemRelations!=0 and len(mixedRelations)!=0: p_relation=len(intersection)/len(mixedRelations) r_relation=len(intersection)/numberSystemRelations for relation in raw[2]: if relation[relation.rfind('/')+1:] in [tup[1][tup[1].rfind('/')+1:] for tup in mixedRelations]: #p_relation=1/numberSystemRelations correctRelations=correctRelations+1 #print(raw[0]) else: wrongRelations=wrongRelations+1 correct=False global questions_labels numberSystemEntities=len(raw[3]) intersection= set(raw[3]).intersection([tup[1] for tup in entities]) if numberSystemEntities!=0 and len(entities)!=0 : p_entity=len(intersection)/len(entities) r_entity=len(intersection)/numberSystemEntities for entity in raw[3]: if entity in [tup[1] for tup in entities]: correctEntities=correctEntities+1 else: wrongEntities=wrongEntities+1 correct=False print(raw[0]) #print(count) #print(p_entity) count=count+1 endTime=time.time() raw.append(endTime-startTime) #print(mixedRelations) #print(entities) ############ #raw.append([[tup[1],tup[4]] for tup in mixedRelations]) #raw.append([[tup[1],tup[4]] for tup in entities]) ############# #raw.append(p_relation) #raw.append(r_relation) #return raw relations_falcon = [tup[1] for tup in mixedRelations] entities_falcon = [tup[1] for tup in entities] return entities_falcon, relations_falcon