def process_word_E_long(question): #print(question) #startTime=time.time() global count k = 1 entities = [] #question=question[0].lower() + question[1:] originalQuestion = question question = question.replace("?", "") question = question.replace(".", "") question = question.replace("!", "") question = question.replace("'s", "") question = question.replace("'", "") question = question.replace("\\", "") question = question.replace("#", "") question = question[0].lower() + question[1:] questionStopWords = stopwords.extract_stop_words_question( question, stopWordsList) combinations = get_question_combinatios(question, questionStopWords) combinations = split_base_on_verb(combinations, originalQuestion) for idx, term in enumerate(combinations): if len(term) == 0: continue if term[0].istitle(): continue ontologyResults = searchIndex.ontologySearch(term) propertyResults = searchIndex.propertySearch(term) if len(ontologyResults) > 2 or len(propertyResults) > 0: del combinations[idx] combinations = merge_comb_stop_words(combinations, question, questionStopWords) combinations = sort_combinations(combinations, question) combinations = merge_entity_prefix(question, combinations, originalQuestion) combinations, compare_found = split_bas_on_comparison(combinations) combinations = extract_abbreviation(combinations) try: for term in combinations: #print(term) entityResults = searchIndex.entitySearch(term) if len(entityResults) > 0: entities.append([entity + [term] for entity in entityResults]) except: return [] results = [] for raw in entities: for entity in sorted(raw, reverse=True, key=lambda x: x[2])[:k]: results.append(entity) #print("Entities:") #print(entities) return [[entity[1], entity[4]] for entity in results]
def evaluate(raw): evaluation=True startTime=time.time() oneQuestion=False global correctRelations global wrongRelations global correctEntities global wrongEntities global count correctRelations = 0 wrongRelations = 0 correctEntities = 0 wrongEntities = 0 count = 1 p_entity=0 r_entity=0 p_relation=0 r_relation=0 k=1 correct=True questionRelationsNumber=0 entities=[] questionWord=raw[0].strip().split(' ')[0] mixedRelations=[] #beforeMixRelations=[] question=raw[0] originalQuestion=question question=question[0].lower() + question[1:] question=question.replace("?","") question=question.replace(".","") question=question.replace("!","") #question=question.replace("'s","") #question=question.replace("'","") question=question.replace("\\","") question=question.replace("#","") questionStopWords=stopwords.extract_stop_words_question(question,stopWordsList) combinations=get_question_combinatios(question,questionStopWords) #combinations=merge_comb_stop_words(combinations,question,questionStopWords) #print(combinations) combinations=split_base_on_verb(combinations,originalQuestion) #combinations=split_base_on_titles(combinations) #print(combinations) combinations=split_base_on_s(combinations) oldCombinations=combinations for idx,term in enumerate(combinations): if len(term)==0: continue if term[0].istitle(): continue; ontologyResults=searchIndex.ontologySearch(term) propertyResults=searchIndex.propertySearch(term) if len(ontologyResults) == 0 and len(propertyResults) == 0: combinations[idx]=term.capitalize() question=question.replace(term,term.capitalize()) combinations=merge_comb_stop_words(combinations,question,questionStopWords) combinations=sort_combinations(combinations,question) combinations=merge_entity_prefix(question,combinations,originalQuestion) combinations,compare_found=split_bas_on_comparison(combinations) combinations=extract_abbreviation(combinations) #print(combinations) i=0 nationalityFlag=False for term in combinations: #print(term) relations=[] properties=[] entities_term=[] if len(term)==0: continue #relations=reRank_results(term,relations) countryImprovement=realtions_entities_country_improvement(term) if countryImprovement != "": #print("hi") nationalityFlag=True entities.append([["country",countryImprovement,0,20,term]]) if (not word_is_verb(term,originalQuestion)) and (term[0].istitle() or len(term.split(' ')) > 2 or (len(term)>1 and len(searchIndex.ontologySearch(term)) < 2 ) or (any(x.isupper() for x in term))) : #print("hi") entityResults=searchIndex.entitySearch(term) if " and " in term: for word in term.split(' and '): entityResults.extend(searchIndex.entitySearch(word.strip())) if " or " in term: for word in term.split(' or '): entityResults.extend(searchIndex.entitySearch(word.strip())) if len(entityResults)!=0: for result in entityResults: if result[1] not in [e[1] for e in entities_term]: entities_term.append(result+[term]) #print(len(entities_term)) entities.append(entities_term) #print(entities) else: ontologyResults=searchIndex.ontologySearch(term) if len(ontologyResults)!=0: for result in ontologyResults: if not (result[1][result[1].rfind('/')+1:][0].istitle()): relations.append(result+[term]) propertyResults=searchIndex.propertySearch(term) if len(propertyResults)!=0: propertyResults=[result+[term] for result in propertyResults] properties=properties+propertyResults mixedRelations.append("") mixedRelations[i]=relations+properties i=i+1 questionRelationsNumber=len(mixedRelations) oldEnities=entities if (len(mixedRelations)==0 and questionWord.lower()=="when") or compare_found: mixedRelations.append([["date","http://dbpedia.org/ontology/date",0,20],["date","http://dbpedia.org/property/date",0,20]]) compare_found=False for i in range(len(mixedRelations)): #print(i) mixedRelations[i]=distinct_relations(mixedRelations[i]) mixedRelations[i],entities=reRank_relations(entities,mixedRelations[i],questionWord,questionRelationsNumber,question,k) mixedRelations=mix_list_items(mixedRelations,k) entities=mix_list_items_entities(entities,k) mixedRelations.extend(relations_improvement_country(entities)) if nationalityFlag: mixedRelations.append(["country","http://dbpedia.org/ontology/country",20]) if oneQuestion: #print("Relations:") #print(mixedRelations) #print("Entities:") #print(entities) return if(evaluation): numberSystemRelations=len(raw[2]) intersection= set(raw[2]).intersection([tup[1] for tup in mixedRelations]) if numberSystemRelations!=0 and len(mixedRelations)!=0: p_relation=len(intersection)/len(mixedRelations) r_relation=len(intersection)/numberSystemRelations for relation in raw[2]: if relation[relation.rfind('/')+1:] in [tup[1][tup[1].rfind('/')+1:] for tup in mixedRelations]: #p_relation=1/numberSystemRelations correctRelations=correctRelations+1 #print(raw[0]) else: wrongRelations=wrongRelations+1 correct=False global questions_labels numberSystemEntities=len(raw[3]) intersection= set(raw[3]).intersection([tup[1] for tup in entities]) if numberSystemEntities!=0 and len(entities)!=0 : p_entity=len(intersection)/len(entities) r_entity=len(intersection)/numberSystemEntities for entity in raw[3]: if entity in [tup[1] for tup in entities]: correctEntities=correctEntities+1 else: wrongEntities=wrongEntities+1 correct=False print(raw[0]) #print(count) #print(p_entity) count=count+1 endTime=time.time() raw.append(endTime-startTime) #print(mixedRelations) #print(entities) ############ #raw.append([[tup[1],tup[4]] for tup in mixedRelations]) #raw.append([[tup[1],tup[4]] for tup in entities]) ############# #raw.append(p_relation) #raw.append(r_relation) #return raw relations_falcon = [tup[1] for tup in mixedRelations] entities_falcon = [tup[1] for tup in entities] return entities_falcon, relations_falcon
def evaluate(raw, rules, evaluation=True): try: relations_flag = False global correctRelations #correctRelations=0 global wrongRelations #wrongRelations=0 global correctEntities #correctEntities=0 global wrongEntities #wrongEntities=0 global count print(count) p_entity = 0 r_entity = 0 p_relation = 0 r_relation = 0 k = 1 questionRelationsNumber = 0 entities = [] questionWord = raw[0].strip().split(' ')[ 0] # Fetch the query head word mixedRelations = [] question = raw[0] if question.strip()[-1] != "?": question = question + "?" originalQuestion = question question = question[0].lower() + question[1:] question = question.replace("?", "") question = question.replace(".", "") question = question.replace("!", "") question = question.replace("\\", "") question = question.replace("#", "") questionStopWords = [] combinations = question.split(' ') combinations_relations = [] """ Falcon 2.0 pipeline is implemented as a forward chain of a carefully curated list of rules based on fundamental principles of the English morphology. The user is allowed to choose a set of rules to process the query. The "rules" list variable enlists the rules chosen by the user. Based on this set of rules, the Falcon 2.0 pipeline processes the input query. """ if any(x == 1 for x in rules): questionStopWords = extract_stop_words_question( question) #rule1: Stopwords cannot be entities or relations if any(x == 2 for x in rules): combinations = get_question_combinatios( question, questionStopWords ) #rule 2: If two or more words do not have any stopword in between, consider them as a single compound word if any(x == 4 for x in rules): combinations, combinations_relations = split_base_on_verb( combinations, combinations_relations, originalQuestion ) #rule 4: Verbs cannot be an entity, Verbs act as a division point of the sentence in case of two entities and we do not merge tokens from either side of the verb. combinations = split_base_on_s(combinations) if any(x == 3 for x in rules): combinations, combinations_relations = merge_comb_stop_words( combinations, combinations_relations, question, questionStopWords ) #rule 3: Entities with only stopwords between them are one entity if any(x == 5 for x in rules): for idx, term in enumerate( combinations ): #rule 5: If a token does not have any relation candidate, identify it as an entity if len(term) == 0: continue if term[0].istitle(): continue propertyResults = wiki_search_elastic.propertySearch(term) if len(propertyResults) == 0: combinations[idx] = term.capitalize() question = question.replace(term, term.capitalize()) if any(x == 3 for x in rules): combinations = sort_combinations(combinations, question) if any(x == 8 for x in rules): combinations, compare_found = split_bas_on_comparison( combinations ) #rule 8: Comparison words acts as a point of division in case of two tokens/entities if any(x == 9 for x in rules): combinations = extract_abbreviation( combinations) #rule 9: Abbreviations are separate entities if any(x == 10 for x in rules): combinations, combinations_relations = split_base_on_entities( combinations, combinations_relations, originalQuestion ) #rule 10: Split the surface form if it's already recognized as a Person if any(x == 14 for x in rules): combinations, combinations_relations = get_relations_seachindex( combinations, combinations_relations) #rule 14 combinations = upper_all_entities(combinations, originalQuestion) if any(x == 12 for x in rules): combinations = merge_comb_det( combinations, originalQuestion ) #rule 12: Merge the determiner in the combination, if preceding an entity #Rules applied during/after elastic search i = 0 nationalityFlag = False for term in combinations: entities_term = [] if len(term) == 0: continue if check_entities_in_text(originalQuestion, term): term = term.capitalize() entityResults = wiki_search_elastic.entitySearch(term) if " and " in term: for word in term.split(' and '): entityResults.extend( wiki_search_elastic.entitySearch(word.strip())) if " or " in term: for word in term.split(' or '): entityResults.extend( wiki_search_elastic.entitySearch(word.strip())) if len(entityResults) != 0: for result in entityResults: if result[1] not in [e[1] for e in entities_term]: entities_term.append(result + [term]) entities.append(entities_term) for term in combinations_relations: properties = [] propertyResults = wiki_search_elastic.propertySearch(term) if len(propertyResults) != 0: propertyResults = [ result + [term] for result in propertyResults ] properties = properties + propertyResults mixedRelations.append("") mixedRelations[i] = properties i = i + 1 questionRelationsNumber = len(mixedRelations) if (len(mixedRelations) == 0 and questionWord.lower() == "when"): mixedRelations.append([[ "time", "<http://www.wikidata.org/wiki/Property:P569>", 0, 20, "when" ]]) for i in range(len(mixedRelations)): #print(i) mixedRelations[i] = distinct_relations(mixedRelations[i]) try: if any( x == 13 for x in rules ): #rule13: If the text is a question, use the question word to increase the weight of all the relations which range matches the question word expected answer. head_rule = True else: head_rule = False mixedRelations[i], entities = reRank_relations( entities, mixedRelations[i], questionWord, questionRelationsNumber, question, k, head_rule) except: try: mixedRelations[i], entities = reRank_relations( entities, mixedRelations[i], questionWord, questionRelationsNumber, question, k, head_rule) except: continue mixedRelations = mix_list_items(mixedRelations, k) entities = mix_list_items_entities(entities, k) if nationalityFlag: mixedRelations.append([ "country", "<https://www.wikidata.org/wiki/Property:P17>", 20, "country" ]) # If the evaluation flag is set to True, run the Falcon 2.0 pipeline on datasets if evaluation: if relations_flag: numberSystemRelations = len(raw[2]) intersection = set(raw[2]).intersection([ tup[1][tup[1].rfind('/') + 1:-1] for tup in mixedRelations ]) if numberSystemRelations != 0 and len(mixedRelations) != 0: p_relation = len(intersection) / len(mixedRelations) r_relation = len(intersection) / numberSystemRelations true_entity = [] for e in raw[1]: true_entity.append(e) numberSystemEntities = len(raw[1]) intersection = set(true_entity).intersection( [tup[1][tup[1].rfind('/') + 1:-1] for tup in entities]) if numberSystemEntities != 0 and len(entities) != 0: p_entity = len(intersection) / len(entities) r_entity = len(intersection) / numberSystemEntities for e in true_entity: if e in [tup[1][tup[1].rfind('/') + 1:-1] for tup in entities]: correctEntities = correctEntities + 1 else: wrongEntities = wrongEntities + 1 count = count + 1 ############ raw.append([[tup[1], tup[4]] for tup in mixedRelations]) raw.append([[tup[1], tup[4]] for tup in entities]) raw.append(p_entity) raw.append(r_entity) raw.append(p_relation) raw.append(r_relation) global threading if threading == True: global results results.append(raw) return raw except: #raise print("error")
def evaluate(raw): evaluation = False startTime = time.time() oneQuestion = False global correctRelations #correctRelations=0 global wrongRelations #wrongRelations=0 global correctEntities #correctEntities=0 global wrongEntities #wrongEntities=0 global count count = 1 p_entity = 0 r_entity = 0 p_relation = 0 r_relation = 0 k = 5 correct = True questionRelationsNumber = 0 entities = [] questionWord = raw[0].strip().split(' ')[0] mixedRelations = [] #beforeMixRelations=[] question = raw[0] #print(question) originalQuestion = question question = question[0].lower() + question[1:] question = question.replace("?", "") question = question.replace(".", "") question = question.replace("!", "") question = question.replace("\\", "") question = question.replace("#", "") questionStopWords = wiki_stopwords.extract_stop_words_question( question, stopWordsList) # print('questionStopWords: ', questionStopWords) combinations = get_question_combinatios(question, questionStopWords) # print('combinations: ',combinations) combinations = merge_comb_stop_words(combinations, question, questionStopWords) #print(combinations) combinations = split_base_on_verb(combinations, originalQuestion) combinations = split_base_on_s(combinations) oldCombinations = combinations for idx, term in enumerate(combinations): if len(term) == 0: continue if term[0].istitle(): continue propertyResults = searchIndex.propertySearch(term) if len(propertyResults) == 0: combinations[idx] = term.capitalize() question = question.replace(term, term.capitalize()) combinations = merge_comb_stop_words(combinations, question, questionStopWords) combinations = sort_combinations(combinations, question) combinations = merge_entity_prefix(question, combinations, originalQuestion) combinations, compare_found = split_bas_on_comparison(combinations) combinations = extract_abbreviation(combinations) i = 0 nationalityFlag = False for term in combinations: properties = [] entities_term = [] if len(term) == 0: continue if (not word_is_verb(term, originalQuestion)) and ( term[0].istitle() or len(term.split(' ')) > 2 or (any(x.isupper() for x in term))): # print(term," ", i) entityResults = wiki_search_elastic.entitySearch(term) if " and " in term: for word in term.split(' and '): entityResults.extend( wiki_search_elastic.entitySearch(word.strip())) if " or " in term: for word in term.split(' or '): entityResults.extend( wiki_search_elastic.entitySearch(word.strip())) if len(entityResults) != 0: for result in entityResults: if result[1] not in [e[1] for e in entities_term]: entities_term.append(result + [term]) #print(len(entities_term)) entities.append(entities_term) #print(entities) else: propertyResults = wiki_search_elastic.propertySearch(term) if len(propertyResults) != 0: propertyResults = [ result + [term] for result in propertyResults ] properties = properties + propertyResults mixedRelations.append("") mixedRelations[i] = properties i = i + 1 questionRelationsNumber = len(mixedRelations) oldEnities = entities if (len(mixedRelations) == 0 and questionWord.lower() == "when") or compare_found: mixedRelations.append( [["time", "http://www.wikidata.org/wiki/Property:P569", 0, 20]]) compare_found = False for i in range(len(mixedRelations)): #print(i) mixedRelations[i] = distinct_relations(mixedRelations[i]) mixedRelations[i], entities = reRank_relations( entities, mixedRelations[i], questionWord, questionRelationsNumber, question, k) mixedRelations = mix_list_items(mixedRelations, k) entities = mix_list_items_entities(entities, k) if nationalityFlag: mixedRelations.append( ["country", "https://www.wikidata.org/wiki/Property:P17", 20]) if evaluation: prop = "<http://www.wikidata.org/wiki/Property:" + raw[2][0] + ">" #prop =raw[2] #numberSystemRelations=len(raw[1]) numberSystemRelations = 1 intersection = set(raw[2]).intersection( [tup[1][tup[1].rfind('/') + 1:-1] for tup in mixedRelations]) if numberSystemRelations != 0 and len(mixedRelations) != 0: p_relation = len(intersection) / len(mixedRelations) r_relation = len(intersection) / numberSystemRelations if relation[relation.rfind('/') + 1:] in [ tup[1][tup[1].rfind('/') + 1:] for tup in mixedRelations ]: correctRelations = correctRelations + 1 else: wrongRelations = wrongRelations + 1 correct = False global questions_labels true_entity = [] for e in raw[1]: true_entity.append(e) #true_entity = raw[1] numberSystemEntities = len(raw[1]) # print(true_entity, entities) intersection = set(true_entity).intersection( [tup[1][tup[1].rfind('/') + 1:-1] for tup in entities]) true_entity = "<http://www.wikidata.org/entity/" + raw[0] + ">" numberSystemEntities = len(raw[0]) if numberSystemEntities != 0 and len(entities) != 0: p_entity = len(intersection) / len(entities) r_entity = len(intersection) / numberSystemEntities for e in true_entity: if e in [tup[1][tup[1].rfind('/') + 1:-1] for tup in entities]: correctEntities = correctEntities + 1 else: wrongEntities = wrongEntities + 1 correct = False count = count + 1 #endTime=time.time() #raw.append(endTime-startTime) ############ raw.append([[tup[1], tup[4]] for tup in mixedRelations]) raw.append([[tup[1], tup[4]] for tup in entities]) #raw.append(p_entity) #raw.append(r_entity) #raw.append(p_relation) #raw.append(r_relation) return raw
entityResults=wiki_search_elastic.entitySearch(term) if " and " in term: for word in term.split(' and '): entityResults.extend(wiki_search_elastic.entitySearch(word.strip())) if " or " in term: for word in term.split(' or '): entityResults.extend(wiki_search_elastic.entitySearch(word.strip())) if len(entityResults)!=0: for result in entityResults: if result[1] not in [e[1] for e in entities_term]: entities_term.append(result+[term]) #print(len(entities_term)) entities.append(entities_term) #print(entities) else: propertyResults=wiki_search_elastic.propertySearch(term) if len(propertyResults)!=0: propertyResults=[result+[term] for result in propertyResults] properties=properties+propertyResults mixedRelations.append("") mixedRelations[i]=properties i=i+1 questionRelationsNumber=len(mixedRelations) oldEnities=entities if (len(mixedRelations)==0 and questionWord.lower()=="when") or compare_found: mixedRelations.append([["time","http://www.wikidata.org/wiki/Property:P569",0,20]]) compare_found=False for i in range(len(mixedRelations)): #print(i)