def rankDocument(doc, q):
    titly = doc.find_all("content", attrs={"name": ["title", "altTitle"]})
    hl = 0  # count of terms highlited by search engine
    sim = 0  # query-defined similarity with the titles
    for t in titly:  # We search for the max scores from all the title entries
        c = BeautifulSoup(t.get_text(), "html.parser")
        hl = max(hl, len(c.find_all("span")))
        sim = max(sim, q.similarity(Analysis(c.get_text(), superficial=True)))
    return hl + sim
def rankResult(result, q):
    title_similarity = q.similarity(Analysis(result['title'],
                                             superficial=True))
    snippet = BeautifulSoup(result['snippet'], 'html.parser')
    matches = snippet.find_all('span', attrs={"class": "searchmatch"})
    match_score = len(set(m.get_text() for m in matches))
    #print('{}: {}+{}'.format(result['title'], title_similarity, match_score))
    title_weight = 1
    match_weight = 0.1
    return title_weight * title_similarity + match_weight * match_score
예제 #3
0
def response(data_file):
    with open(data_file, 'r') as file:
        data = json.load(file)
        #print(json.dumps(data, indent=4, sort_keys=True))
        ground = Ground()
        for i in data['questions']:
            question = Analysis(i['body'])
            question_type = question.graph.question_type
            doc = Analysis(retrieveDocument(question))
            ground.teardown()
            ground.add_text(doc)
            answers = ground.ask_question(question)
            print('------------------------------------')
            print('Question: ' + i['body'])
            print('Question type: ' + question_type)
            print('Ideal answer: ', i['ideal_answer'])
            if (len(answers) == 0):
                print("Answer: Sorry, I don't know the answer.")
            else:
                print('Answer: ',
                      '\n'.join(compose_answer(question, a) for a in answers))
    results = json.loads(response.text)['query']['search']
    best_result = max(results, key=lambda r: rankResult(r, q))
    page = wikiRetrieve(best_result['pageid'])
    content = BeautifulSoup(
        json.loads(page.text)['parse']['text']['*'], 'html.parser')
    return ' '.join(p.get_text() for p in content.find_all('p'))


def retrieveDocument(q):
    r = wikiSearch(q.content_words())
    doc = getMostRelevantDocument(r, q)
    return ' '.join(doc.split())


if __name__ == "__main__":
    q = Analysis("What is the internet of things?")
    print(q.content_words())

    r = wikiSearch(q.content_words())
    print("{} -> {}".format(r.url, r.status_code))

    results = json.loads(r.text)['query']['search']
    print([(r['title'], r['snippet']) for r in results])

    page = wikiRetrieve(results[0]['pageid'])
    print(json.loads(page.text)['parse']['text']['*'])

    doc = getMostRelevantDocument(r, q)
    print(doc)

    print(retrieveDocument(Analysis("What is a heart attack?")))
예제 #5
0
    for result in cur.fetchall():
        tmp = {}
        tmp['content'] = str(list(result)[0])
        data.append(tmp)
        index = index + 1
    disconnect(conn)
    client = MongoClient('')
    client.tfgchat.test.delete_many({})
    client.tfgchat.test.insert_many(data)


def getMostRelevantDocument(q):
    client = MongoClient('')
    query = ' '.join(q.content_words())
    client.tfgchat.test.create_index([('content', "text")])
    cursor = client.tfgchat.test.find_one({"$text": {
        "$search": query
    }}, {"score": {
        "$meta": "textScore"
    }})
    return BeautifulSoup(cursor.get('content'), 'html.parser').get_text()


def retrieveDocument(q):
    #createDatabase(2000)
    return getMostRelevantDocument(q)


if __name__ == "__main__":
    q = Analysis("What is a heart attack?")
    print(retrieveDocument(q))
    docs = parsed.find_all('document')
    doc = max(docs, key=lambda d: rankDocument(d, q))
    html = doc.find("content", attrs={"name": "FullSummary"}).get_text()
    return BeautifulSoup(html, "html.parser").get_text(" ")


'''
    Retrieves different documents which can contain the response to 
    question introduced by the user.
    To do it, it uses the result of the analysis of this question (parameter q).
'''


def retrieveDocument(q):
    r = medlineSearch(q.content_words())
    doc = getMostRelevantDocument(r, q)
    return ' '.join(doc.split())


if __name__ == "__main__":
    q = Analysis("What are the causes of blood infection?")
    print(q.content_words())

    r = medlineSearch(q.content_words())
    print("{} -> {}".format(r.url, r.status_code))

    doc = getMostRelevantDocument(r, q)
    print(doc)

    print(retrieveDocument(Analysis("What are the causes of a heart attack?")))
예제 #7
0

def test_init():
    global Analysis, nlg

    import nbimporter
    from natural_language import Analysis

    from grafeno import linearizers
    nlg = linearizers.get_pipeline(['node_edges'])


# Test case showing the behavior of Ground class
if __name__ == "__main__":

    test_init()
    ground = Ground()
    ground.teardown()
    ground.add_text(
        Analysis(
            "John loves Mary. John loves very cute dogs. Peter hates Susan. Susan loves John. Paul loves Joana. Joana loves Paul."
        ))
    answers = ground.ask_question(Analysis("Who loves John"))
    for answer in answers:
        print(answer.linearize(linearizer=nlg))

    print('##########################################################')
    answers = ground.ask_question(Analysis("John loves who"))
    for answer in answers:
        print(answer.linearize(linearizer=nlg))
예제 #8
0
def do_tests(original_path, destination_path, type_filter='none'):
    json_file = json.load(open(original_path))
    all_tests = []
    errors = {}
    # We create a single Ground
    ground = Ground()

    for question in json_file['questions']:
        # Error Handling
        snipped_errors = ""
        question_errors = ""
        compose_answer_errors = False

        question_text = question['body']
        ideal_answer = question['ideal_answer']

        # Clears the Ground
        ground.teardown()

        # Adds all the snippets to the current knowledge base
        for snippet in question['snippets']:
            snippet_text = snippet['text']
            try:
                ground.add_text(Analysis(snippet_text))
            except (KeyError):
                snipped_errors = snippet_text

        try:
            # Analyzes the query
            q = Analysis(question['body'])

            question_type = q.graph.question_type

            # If we have to avoid this type of question due to the filter, we move to the next for iteration
            if (type_filter != 'none' and type_filter != question_type):
                continue

            print('------------------------------------')
            print('Question: ' + question_text)
            print('Question type: ' + question_type)
            print('Ideal answer: ' + ideal_answer[0])

            # We ask a question to the current knowledge base
            answers = ground.ask_question(q)
        except:
            question_errors = question['body']
            print('     Question errors: ' + question_errors)

        try:
            # Translates the answer into a natural language sentence
            answers = response(q, answers, question_type)
        except:
            print('     Compose answer errors')
            compose_answer_errors = True

        print('Answer: ' + answers)

        # We collect all the relevant information
        all_tests.append({
            'question': question_text,
            'answer': answers,
            'ideal_answer': ideal_answer,
            'snipped_errors': snipped_errors,
            'question_errors': question_errors,
            'compose_answer_errors': compose_answer_errors
        })

    # We collect all the questions
    full = {"questions": all_tests}

    # We saved all the questions into the specified file
    dump = json.dumps(full)
    f = open(destination_path, "w")
    f.write(dump)
    f.close()