Exemple #1
0
def process_word_E_long(question):
    #print(question)
    #startTime=time.time()
    global count
    k = 1

    entities = []

    #question=question[0].lower() + question[1:]
    originalQuestion = question
    question = question.replace("?", "")
    question = question.replace(".", "")
    question = question.replace("!", "")
    question = question.replace("'s", "")
    question = question.replace("'", "")
    question = question.replace("\\", "")
    question = question.replace("#", "")
    question = question[0].lower() + question[1:]
    questionStopWords = stopwords.extract_stop_words_question(
        question, stopWordsList)
    combinations = get_question_combinatios(question, questionStopWords)
    combinations = split_base_on_verb(combinations, originalQuestion)
    for idx, term in enumerate(combinations):
        if len(term) == 0:
            continue
        if term[0].istitle():
            continue
        ontologyResults = searchIndex.ontologySearch(term)
        propertyResults = searchIndex.propertySearch(term)
        if len(ontologyResults) > 2 or len(propertyResults) > 0:
            del combinations[idx]

    combinations = merge_comb_stop_words(combinations, question,
                                         questionStopWords)
    combinations = sort_combinations(combinations, question)
    combinations = merge_entity_prefix(question, combinations,
                                       originalQuestion)
    combinations, compare_found = split_bas_on_comparison(combinations)
    combinations = extract_abbreviation(combinations)
    try:
        for term in combinations:
            #print(term)
            entityResults = searchIndex.entitySearch(term)
            if len(entityResults) > 0:
                entities.append([entity + [term] for entity in entityResults])

    except:
        return []
    results = []
    for raw in entities:
        for entity in sorted(raw, reverse=True, key=lambda x: x[2])[:k]:
            results.append(entity)

    #print("Entities:")
    #print(entities)
    return [[entity[1], entity[4]] for entity in results]
Exemple #2
0
def evaluate(raw):
    
    evaluation=True
    startTime=time.time()
    oneQuestion=False
    global correctRelations
    global wrongRelations
    global correctEntities
    global wrongEntities
    global count
    correctRelations = 0
    wrongRelations = 0
    correctEntities = 0
    wrongEntities = 0
    count = 1
    p_entity=0
    r_entity=0
    p_relation=0
    r_relation=0
    k=1
    correct=True
    questionRelationsNumber=0
    entities=[]
    questionWord=raw[0].strip().split(' ')[0]
    mixedRelations=[]
    #beforeMixRelations=[]
    question=raw[0]
    originalQuestion=question
    question=question[0].lower() + question[1:]
    question=question.replace("?","")
    question=question.replace(".","")
    question=question.replace("!","")
    #question=question.replace("'s","")
    #question=question.replace("'","")
    question=question.replace("\\","")
    question=question.replace("#","")
    questionStopWords=stopwords.extract_stop_words_question(question,stopWordsList)
    combinations=get_question_combinatios(question,questionStopWords)
    #combinations=merge_comb_stop_words(combinations,question,questionStopWords)
    #print(combinations)
    combinations=split_base_on_verb(combinations,originalQuestion)
    #combinations=split_base_on_titles(combinations)
    #print(combinations)
    combinations=split_base_on_s(combinations)
    oldCombinations=combinations
    
    for idx,term in enumerate(combinations):
        if len(term)==0:
            continue
        if term[0].istitle():
            continue;
        ontologyResults=searchIndex.ontologySearch(term)
        propertyResults=searchIndex.propertySearch(term)
        if len(ontologyResults) == 0 and len(propertyResults) == 0:
            combinations[idx]=term.capitalize()
            question=question.replace(term,term.capitalize())
            
    combinations=merge_comb_stop_words(combinations,question,questionStopWords)
    combinations=sort_combinations(combinations,question)
    combinations=merge_entity_prefix(question,combinations,originalQuestion)
    combinations,compare_found=split_bas_on_comparison(combinations)
    combinations=extract_abbreviation(combinations)
    #print(combinations)
    i=0
    nationalityFlag=False
    for term in combinations:
        #print(term)
        relations=[]
        properties=[]
        entities_term=[]
        if len(term)==0:
            continue
        #relations=reRank_results(term,relations)
        countryImprovement=realtions_entities_country_improvement(term)
        if countryImprovement != "":
            #print("hi")
            nationalityFlag=True
            entities.append([["country",countryImprovement,0,20,term]])
        if (not word_is_verb(term,originalQuestion)) and (term[0].istitle() or len(term.split(' ')) > 2 or (len(term)>1 and  len(searchIndex.ontologySearch(term)) < 2 ) or (any(x.isupper() for x in term))) :
            #print("hi")
            
            entityResults=searchIndex.entitySearch(term)
            if " and " in term:
                for word in term.split(' and '):
                    entityResults.extend(searchIndex.entitySearch(word.strip()))
            if " or " in term:
                for word in term.split(' or '):
                    entityResults.extend(searchIndex.entitySearch(word.strip()))
            if len(entityResults)!=0:
                for result in entityResults:
                    if result[1] not in [e[1] for e in entities_term]:
                        entities_term.append(result+[term])
                #print(len(entities_term))
                entities.append(entities_term)
                    #print(entities)
        else:
            ontologyResults=searchIndex.ontologySearch(term)
            if len(ontologyResults)!=0:
                for result in ontologyResults:
                    if not (result[1][result[1].rfind('/')+1:][0].istitle()):
                        relations.append(result+[term])
            propertyResults=searchIndex.propertySearch(term)
            if len(propertyResults)!=0:
                    propertyResults=[result+[term] for result in propertyResults]
                    properties=properties+propertyResults 
            mixedRelations.append("")
            mixedRelations[i]=relations+properties

            i=i+1

    questionRelationsNumber=len(mixedRelations)
    oldEnities=entities
    if (len(mixedRelations)==0 and questionWord.lower()=="when") or compare_found:
        mixedRelations.append([["date","http://dbpedia.org/ontology/date",0,20],["date","http://dbpedia.org/property/date",0,20]])
        compare_found=False

    for i in range(len(mixedRelations)):
        #print(i)
        mixedRelations[i]=distinct_relations(mixedRelations[i])


        mixedRelations[i],entities=reRank_relations(entities,mixedRelations[i],questionWord,questionRelationsNumber,question,k)


            
            
    mixedRelations=mix_list_items(mixedRelations,k)

    entities=mix_list_items_entities(entities,k)
    mixedRelations.extend(relations_improvement_country(entities))
    
    if nationalityFlag:
        mixedRelations.append(["country","http://dbpedia.org/ontology/country",20])
    
    if oneQuestion:
        #print("Relations:")
        #print(mixedRelations)
        #print("Entities:")
        #print(entities)
        return
    if(evaluation):
        numberSystemRelations=len(raw[2])
        intersection= set(raw[2]).intersection([tup[1] for tup in mixedRelations])
        if numberSystemRelations!=0 and len(mixedRelations)!=0:
            p_relation=len(intersection)/len(mixedRelations)
            r_relation=len(intersection)/numberSystemRelations
        for relation in raw[2]:
  
            if relation[relation.rfind('/')+1:] in [tup[1][tup[1].rfind('/')+1:] for tup in mixedRelations]:
                #p_relation=1/numberSystemRelations
                correctRelations=correctRelations+1
                #print(raw[0])
            else:
                
                wrongRelations=wrongRelations+1
                correct=False
                global questions_labels
 
                
  
        numberSystemEntities=len(raw[3])
        intersection= set(raw[3]).intersection([tup[1] for tup in entities])
        if numberSystemEntities!=0 and len(entities)!=0 :
            p_entity=len(intersection)/len(entities)
            r_entity=len(intersection)/numberSystemEntities
        for entity in raw[3]:
            if entity in [tup[1] for tup in entities]:
                
                correctEntities=correctEntities+1
            else:
                wrongEntities=wrongEntities+1
                correct=False
                print(raw[0])
                
      
                        
 
                
        #print(count)
        #print(p_entity)
        count=count+1
    endTime=time.time()
    raw.append(endTime-startTime)
    
    
    #print(mixedRelations)
    #print(entities)
    ############        
    #raw.append([[tup[1],tup[4]] for tup in mixedRelations])
    #raw.append([[tup[1],tup[4]] for tup in entities])
    #############
    #raw.append(p_relation)
    #raw.append(r_relation)
    #return raw
    relations_falcon = [tup[1] for tup in mixedRelations]
    entities_falcon = [tup[1] for tup in entities]
    return entities_falcon, relations_falcon