Beispiel #1
0
def process_word_E_long(question):
    #print(question)
    #startTime=time.time()
    global count
    k = 1

    entities = []

    #question=question[0].lower() + question[1:]
    originalQuestion = question
    question = question.replace("?", "")
    question = question.replace(".", "")
    question = question.replace("!", "")
    question = question.replace("'s", "")
    question = question.replace("'", "")
    question = question.replace("\\", "")
    question = question.replace("#", "")
    question = question[0].lower() + question[1:]
    questionStopWords = stopwords.extract_stop_words_question(
        question, stopWordsList)
    combinations = get_question_combinatios(question, questionStopWords)
    combinations = split_base_on_verb(combinations, originalQuestion)
    for idx, term in enumerate(combinations):
        if len(term) == 0:
            continue
        if term[0].istitle():
            continue
        ontologyResults = searchIndex.ontologySearch(term)
        propertyResults = searchIndex.propertySearch(term)
        if len(ontologyResults) > 2 or len(propertyResults) > 0:
            del combinations[idx]

    combinations = merge_comb_stop_words(combinations, question,
                                         questionStopWords)
    combinations = sort_combinations(combinations, question)
    combinations = merge_entity_prefix(question, combinations,
                                       originalQuestion)
    combinations, compare_found = split_bas_on_comparison(combinations)
    combinations = extract_abbreviation(combinations)
    try:
        for term in combinations:
            #print(term)
            entityResults = searchIndex.entitySearch(term)
            if len(entityResults) > 0:
                entities.append([entity + [term] for entity in entityResults])

    except:
        return []
    results = []
    for raw in entities:
        for entity in sorted(raw, reverse=True, key=lambda x: x[2])[:k]:
            results.append(entity)

    #print("Entities:")
    #print(entities)
    return [[entity[1], entity[4]] for entity in results]
Beispiel #2
0
def evaluate(raw):
    
    evaluation=True
    startTime=time.time()
    oneQuestion=False
    global correctRelations
    global wrongRelations
    global correctEntities
    global wrongEntities
    global count
    correctRelations = 0
    wrongRelations = 0
    correctEntities = 0
    wrongEntities = 0
    count = 1
    p_entity=0
    r_entity=0
    p_relation=0
    r_relation=0
    k=1
    correct=True
    questionRelationsNumber=0
    entities=[]
    questionWord=raw[0].strip().split(' ')[0]
    mixedRelations=[]
    #beforeMixRelations=[]
    question=raw[0]
    originalQuestion=question
    question=question[0].lower() + question[1:]
    question=question.replace("?","")
    question=question.replace(".","")
    question=question.replace("!","")
    #question=question.replace("'s","")
    #question=question.replace("'","")
    question=question.replace("\\","")
    question=question.replace("#","")
    questionStopWords=stopwords.extract_stop_words_question(question,stopWordsList)
    combinations=get_question_combinatios(question,questionStopWords)
    #combinations=merge_comb_stop_words(combinations,question,questionStopWords)
    #print(combinations)
    combinations=split_base_on_verb(combinations,originalQuestion)
    #combinations=split_base_on_titles(combinations)
    #print(combinations)
    combinations=split_base_on_s(combinations)
    oldCombinations=combinations
    
    for idx,term in enumerate(combinations):
        if len(term)==0:
            continue
        if term[0].istitle():
            continue;
        ontologyResults=searchIndex.ontologySearch(term)
        propertyResults=searchIndex.propertySearch(term)
        if len(ontologyResults) == 0 and len(propertyResults) == 0:
            combinations[idx]=term.capitalize()
            question=question.replace(term,term.capitalize())
            
    combinations=merge_comb_stop_words(combinations,question,questionStopWords)
    combinations=sort_combinations(combinations,question)
    combinations=merge_entity_prefix(question,combinations,originalQuestion)
    combinations,compare_found=split_bas_on_comparison(combinations)
    combinations=extract_abbreviation(combinations)
    #print(combinations)
    i=0
    nationalityFlag=False
    for term in combinations:
        #print(term)
        relations=[]
        properties=[]
        entities_term=[]
        if len(term)==0:
            continue
        #relations=reRank_results(term,relations)
        countryImprovement=realtions_entities_country_improvement(term)
        if countryImprovement != "":
            #print("hi")
            nationalityFlag=True
            entities.append([["country",countryImprovement,0,20,term]])
        if (not word_is_verb(term,originalQuestion)) and (term[0].istitle() or len(term.split(' ')) > 2 or (len(term)>1 and  len(searchIndex.ontologySearch(term)) < 2 ) or (any(x.isupper() for x in term))) :
            #print("hi")
            
            entityResults=searchIndex.entitySearch(term)
            if " and " in term:
                for word in term.split(' and '):
                    entityResults.extend(searchIndex.entitySearch(word.strip()))
            if " or " in term:
                for word in term.split(' or '):
                    entityResults.extend(searchIndex.entitySearch(word.strip()))
            if len(entityResults)!=0:
                for result in entityResults:
                    if result[1] not in [e[1] for e in entities_term]:
                        entities_term.append(result+[term])
                #print(len(entities_term))
                entities.append(entities_term)
                    #print(entities)
        else:
            ontologyResults=searchIndex.ontologySearch(term)
            if len(ontologyResults)!=0:
                for result in ontologyResults:
                    if not (result[1][result[1].rfind('/')+1:][0].istitle()):
                        relations.append(result+[term])
            propertyResults=searchIndex.propertySearch(term)
            if len(propertyResults)!=0:
                    propertyResults=[result+[term] for result in propertyResults]
                    properties=properties+propertyResults 
            mixedRelations.append("")
            mixedRelations[i]=relations+properties

            i=i+1

    questionRelationsNumber=len(mixedRelations)
    oldEnities=entities
    if (len(mixedRelations)==0 and questionWord.lower()=="when") or compare_found:
        mixedRelations.append([["date","http://dbpedia.org/ontology/date",0,20],["date","http://dbpedia.org/property/date",0,20]])
        compare_found=False

    for i in range(len(mixedRelations)):
        #print(i)
        mixedRelations[i]=distinct_relations(mixedRelations[i])


        mixedRelations[i],entities=reRank_relations(entities,mixedRelations[i],questionWord,questionRelationsNumber,question,k)


            
            
    mixedRelations=mix_list_items(mixedRelations,k)

    entities=mix_list_items_entities(entities,k)
    mixedRelations.extend(relations_improvement_country(entities))
    
    if nationalityFlag:
        mixedRelations.append(["country","http://dbpedia.org/ontology/country",20])
    
    if oneQuestion:
        #print("Relations:")
        #print(mixedRelations)
        #print("Entities:")
        #print(entities)
        return
    if(evaluation):
        numberSystemRelations=len(raw[2])
        intersection= set(raw[2]).intersection([tup[1] for tup in mixedRelations])
        if numberSystemRelations!=0 and len(mixedRelations)!=0:
            p_relation=len(intersection)/len(mixedRelations)
            r_relation=len(intersection)/numberSystemRelations
        for relation in raw[2]:
  
            if relation[relation.rfind('/')+1:] in [tup[1][tup[1].rfind('/')+1:] for tup in mixedRelations]:
                #p_relation=1/numberSystemRelations
                correctRelations=correctRelations+1
                #print(raw[0])
            else:
                
                wrongRelations=wrongRelations+1
                correct=False
                global questions_labels
 
                
  
        numberSystemEntities=len(raw[3])
        intersection= set(raw[3]).intersection([tup[1] for tup in entities])
        if numberSystemEntities!=0 and len(entities)!=0 :
            p_entity=len(intersection)/len(entities)
            r_entity=len(intersection)/numberSystemEntities
        for entity in raw[3]:
            if entity in [tup[1] for tup in entities]:
                
                correctEntities=correctEntities+1
            else:
                wrongEntities=wrongEntities+1
                correct=False
                print(raw[0])
                
      
                        
 
                
        #print(count)
        #print(p_entity)
        count=count+1
    endTime=time.time()
    raw.append(endTime-startTime)
    
    
    #print(mixedRelations)
    #print(entities)
    ############        
    #raw.append([[tup[1],tup[4]] for tup in mixedRelations])
    #raw.append([[tup[1],tup[4]] for tup in entities])
    #############
    #raw.append(p_relation)
    #raw.append(r_relation)
    #return raw
    relations_falcon = [tup[1] for tup in mixedRelations]
    entities_falcon = [tup[1] for tup in entities]
    return entities_falcon, relations_falcon
Beispiel #3
0
def evaluate(raw):
    evaluation = False
    startTime = time.time()
    oneQuestion = False
    global correctRelations
    #correctRelations=0
    global wrongRelations
    #wrongRelations=0
    global correctEntities
    #correctEntities=0
    global wrongEntities
    #wrongEntities=0
    global count
    count = 1
    p_entity = 0
    r_entity = 0
    p_relation = 0
    r_relation = 0
    k = 5
    correct = True
    questionRelationsNumber = 0
    entities = []
    questionWord = raw[0].strip().split(' ')[0]
    mixedRelations = []
    #beforeMixRelations=[]
    question = raw[0]
    #print(question)
    originalQuestion = question
    question = question[0].lower() + question[1:]
    question = question.replace("?", "")
    question = question.replace(".", "")
    question = question.replace("!", "")
    question = question.replace("\\", "")
    question = question.replace("#", "")

    questionStopWords = wiki_stopwords.extract_stop_words_question(
        question, stopWordsList)
    # print('questionStopWords: ', questionStopWords)
    combinations = get_question_combinatios(question, questionStopWords)
    # print('combinations: ',combinations)
    combinations = merge_comb_stop_words(combinations, question,
                                         questionStopWords)
    #print(combinations)

    combinations = split_base_on_verb(combinations, originalQuestion)
    combinations = split_base_on_s(combinations)
    oldCombinations = combinations

    for idx, term in enumerate(combinations):
        if len(term) == 0:
            continue
        if term[0].istitle():
            continue

        propertyResults = searchIndex.propertySearch(term)

        if len(propertyResults) == 0:
            combinations[idx] = term.capitalize()
            question = question.replace(term, term.capitalize())

    combinations = merge_comb_stop_words(combinations, question,
                                         questionStopWords)
    combinations = sort_combinations(combinations, question)
    combinations = merge_entity_prefix(question, combinations,
                                       originalQuestion)
    combinations, compare_found = split_bas_on_comparison(combinations)
    combinations = extract_abbreviation(combinations)
    i = 0
    nationalityFlag = False
    for term in combinations:
        properties = []
        entities_term = []
        if len(term) == 0:
            continue

        if (not word_is_verb(term, originalQuestion)) and (
                term[0].istitle() or len(term.split(' ')) > 2 or
            (any(x.isupper() for x in term))):
            # print(term," ", i)
            entityResults = wiki_search_elastic.entitySearch(term)
            if " and " in term:
                for word in term.split(' and '):
                    entityResults.extend(
                        wiki_search_elastic.entitySearch(word.strip()))
            if " or " in term:
                for word in term.split(' or '):
                    entityResults.extend(
                        wiki_search_elastic.entitySearch(word.strip()))
            if len(entityResults) != 0:
                for result in entityResults:
                    if result[1] not in [e[1] for e in entities_term]:
                        entities_term.append(result + [term])
                #print(len(entities_term))
                entities.append(entities_term)
                #print(entities)
        else:
            propertyResults = wiki_search_elastic.propertySearch(term)
            if len(propertyResults) != 0:
                propertyResults = [
                    result + [term] for result in propertyResults
                ]
                properties = properties + propertyResults
            mixedRelations.append("")
            mixedRelations[i] = properties
            i = i + 1

    questionRelationsNumber = len(mixedRelations)
    oldEnities = entities
    if (len(mixedRelations) == 0
            and questionWord.lower() == "when") or compare_found:
        mixedRelations.append(
            [["time", "http://www.wikidata.org/wiki/Property:P569", 0, 20]])
        compare_found = False

    for i in range(len(mixedRelations)):
        #print(i)
        mixedRelations[i] = distinct_relations(mixedRelations[i])
        mixedRelations[i], entities = reRank_relations(
            entities, mixedRelations[i], questionWord, questionRelationsNumber,
            question, k)

    mixedRelations = mix_list_items(mixedRelations, k)
    entities = mix_list_items_entities(entities, k)

    if nationalityFlag:
        mixedRelations.append(
            ["country", "https://www.wikidata.org/wiki/Property:P17", 20])

    if evaluation:
        prop = "<http://www.wikidata.org/wiki/Property:" + raw[2][0] + ">"
        #prop =raw[2]
        #numberSystemRelations=len(raw[1])
        numberSystemRelations = 1
        intersection = set(raw[2]).intersection(
            [tup[1][tup[1].rfind('/') + 1:-1] for tup in mixedRelations])
        if numberSystemRelations != 0 and len(mixedRelations) != 0:
            p_relation = len(intersection) / len(mixedRelations)
            r_relation = len(intersection) / numberSystemRelations

        if relation[relation.rfind('/') + 1:] in [
                tup[1][tup[1].rfind('/') + 1:] for tup in mixedRelations
        ]:
            correctRelations = correctRelations + 1

        else:
            wrongRelations = wrongRelations + 1
            correct = False
            global questions_labels

        true_entity = []
        for e in raw[1]:
            true_entity.append(e)
        #true_entity = raw[1]
        numberSystemEntities = len(raw[1])
        # print(true_entity, entities)
        intersection = set(true_entity).intersection(
            [tup[1][tup[1].rfind('/') + 1:-1] for tup in entities])

        true_entity = "<http://www.wikidata.org/entity/" + raw[0] + ">"
        numberSystemEntities = len(raw[0])

        if numberSystemEntities != 0 and len(entities) != 0:
            p_entity = len(intersection) / len(entities)
            r_entity = len(intersection) / numberSystemEntities
        for e in true_entity:
            if e in [tup[1][tup[1].rfind('/') + 1:-1] for tup in entities]:
                correctEntities = correctEntities + 1
            else:
                wrongEntities = wrongEntities + 1
                correct = False

        count = count + 1
    #endTime=time.time()
    #raw.append(endTime-startTime)

    ############
    raw.append([[tup[1], tup[4]] for tup in mixedRelations])
    raw.append([[tup[1], tup[4]] for tup in entities])
    #raw.append(p_entity)
    #raw.append(r_entity)
    #raw.append(p_relation)
    #raw.append(r_relation)
    return raw
Beispiel #4
0
    questionRelationsNumber=0
    entities=[]
    questionWord=raw[0].strip().split(' ')[0]
    mixedRelations=[]
    #beforeMixRelations=[]
    question=raw[0]
    #print(question)
    originalQuestion=question
    question=question[0].lower() + question[1:]
    question=question.replace("?","")
    question=question.replace(".","")
    question=question.replace("!","")
    question=question.replace("\\","")
    question=question.replace("#","")

    questionStopWords=wiki_stopwords.extract_stop_words_question(question,stopWordsList)
    # print('questionStopWords: ', questionStopWords)
    combinations=get_question_combinatios(question,questionStopWords)
    # print('combinations: ',combinations)
    combinations=merge_comb_stop_words(combinations,question,questionStopWords)
    #print(combinations)

    combinations=split_base_on_verb(combinations,originalQuestion)
    combinations=split_base_on_s(combinations)
    oldCombinations=combinations
    
    for idx,term in enumerate(combinations):
        if len(term)==0:
            continue
        if term[0].istitle():
            continue;