Esempio n. 1
0
def test_extract_answer():
    sentence = 'John went to the bathroom'
    question = 'where is john'
    sentences = [
        (sentence, 1),
    ]

    from Helpers.deployment_utils import extract_answer_from_sentences
    extract_answer_from_sentences(
        sentences,
        question,
    )
    return
Esempio n. 2
0
def get_output():
    print('System: Initializing...')
    from Models import abcnn_model
    from Helpers.deployment_utils import extract_answer_from_sentences

    sents = [
        'john went to the bathroom',
        'mary went to the kitchen',
        'john moved to the hallway',
        'kim journeyed to the garden',
        'sandra is in the bedroom',
    ]
    print('\nSystem: The context is:')
    for sentence in sents:
        print(sentence)

    query = 'where is john'
    print('System: The question is:')
    print(query)



    # Select Ans Sents - ABCNN
    print('\nSentence Selection Module: Initializing...')
    abcnn = abcnn_model()
    ans_sents = abcnn.ans_select(query, sents)

    print('\nSystem: Sentences scored by Sentence Selection Module')
    for sentence,score in ans_sents:
        print('{0:50}\t{1}'.format(sentence, score[0]))
    print('')

    best_ans, score, answers = extract_answer_from_sentences(
        ans_sents,
        query,
        verbose=True,
    )

    ans_list = []
    for x in answers:
        ans_list.append({'word':x[0], 'score': x[1]})

    print('\nSystem: Candidate answers scored by Answer Extraction Module')
    for answer in ans_list:
        print('{0:10}\t{1}'.format(answer['word'], answer['score']))
Esempio n. 3
0
    def get_query(self, query):

        self.query = query
        print(self.query)

        # Filter top 5 paras using Info Retrieval
        para_select = infoRX.retrieve_info(self.context, self.query)
        para_sents = []
        tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

        for para in para_select:
            para_sents.extend(tokenizer.tokenize(para))

        # Select Ans Sents - ABCNN
        ans_sents = abcnn.ans_select(query, para_sents)

        best_ans, score, answers = deploy.extract_answer_from_sentences(
            ans_sents, query)

        # Ignore: Phase 2-3: Input Module and Answer Module
        # answers = []
        # for ans, a_score in ans_sents.iteritems():
        # 	words = deploy.extract_answer_from_sentence(ans, self.query)
        # 	words = sorted(words, key=operator.itemgetter(1))
        # 	for word, w_score in words.iteritems()[:5]:
        # 		answers.append((word, w_score * a_score))
        # answers = sorted(answers, key=operator.itemgetter(1))
        # proc = subprocess.Popen(['python','test.py',query],shell=False,stdout=subprocess.PIPE)

        ans_list = []
        for x in answers:
            ans_list.append({'word': x[0], 'score': x[1]})

        ans_dict = {'answers': ans_list}

        return ans_dict
Esempio n. 4
0
    def get_query(self, query):

        self.query = query
        # print('\n' + self.query)
        tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
        # Filter top 5 paras using Info Retrieval
        para_select = infoRX.retrieve_info(tokenizer.tokenize(self.context),
                                           self.query)
        para_sents = []

        # print(self.context)
        # print(type(para_select[0]), para_select[0])

        for para in para_select:
            para_sents.extend(tokenizer.tokenize(para[0]))

        # print('Sentences selected by IR Module:')
        # print(para_sents)

        # try:
        #     # Select Ans Sents - ABCNN
        #     ans_sents = abcnn.ans_select(query, para_sents)

        #     print('\nSystem: Sentences scored by Sentence Selection Module')
        #     for sentence,score in ans_sents:
        #         print('{0:50}\t{1}'.format(sentence, score[0]))
        #     print('')

        #     best_ans, score, answers = deploy.extract_answer_from_sentences(
        #         ans_sents,
        #         query,
        #         verbose=True,
        #     )

        # except Exception as e:

        #     return {'answers': [{'word': 'ERROR', 'score': str(e)}]}

        ans_sents = self.abcnn.ans_select(query, para_sents)

        # print('\nSystem: Sentences scored by Sentence Selection Module')
        # for sentence,score in ans_sents:
        # print('{0:50}\t{1}'.format(sentence, score[0]))
        # print('')

        best_ans, score, answers = deploy.extract_answer_from_sentences(
            ans_sents,
            query,
            verbose=False,
        )

        # Ignore: Phase 2-3: Input Module and Answer Module
        # answers = []
        # for ans, a_score in ans_sents.iteritems():
        #   words = deploy.extract_answer_from_sentence(ans, self.query)
        #   words = sorted(words, key=operator.itemgetter(1))
        #   for word, w_score in words.iteritems()[:5]:
        #       answers.append((word, w_score * a_score))
        # answers = sorted(answers, key=operator.itemgetter(1))
        # proc = subprocess.Popen(['python','test.py',query],shell=False,stdout=subprocess.PIPE)

        ans_list = []
        for x in answers[:5]:
            ans_list.append({'word': x[0], 'score': float(x[1][0])})

        ans_dict = {'answers': ans_list}

        # print('\nSystem: Candidate answers scored by Answer Extraction Module')
        # for answer in ans_list:
        # print('{0:10}\t{1}'.format(answer['word'], answer['score']))

        return ans_dict
Esempio n. 5
0
    def get_query(self, query):

        self.query = query
        print(self.query)

        # Filter top 5 paras using Info Retrieval
        self.update({'val': 'Ranking Paragraphs using Information Retrieval.'})
        para_select = infoRX.retrieve_info(self.context, self.query)
        para_sents = []
        tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')

        print(type(para_select[0]), para_select[0])

        self.update({'val': 'Tokenizing top ranked paragraphs'})
        for para in para_select:
            para_sents.extend(tokenizer.tokenize(para[0]))

        print('Sentences selected by IR Module:')
        print(para_sents)

        val_list = []
        for sent in para_sents:
            val_list.append({'word': sent, 'score': '\b\b'})
        self.update({
            'val': 'Sentences selected by IR Module',
            'answers': val_list
        })

        try:
            # Select Ans Sents - ABCNN
            self.update({'val': 'Ranking Candidate Answer Sentences.'})
            abcnn = abcnn_model()
            ans_sents = abcnn.ans_select(query, para_sents)

            val_list = []
            for sentence, score in ans_sents:
                val_list.append({'word': sentence, 'score': score[0]})
            self.update(
                {
                    'val': 'Sentences scored by Sentence Selection Module',
                    'answers': val_list,
                }, )

            print('\nSystem: Sentences scored by Sentence Selection Module')
            for sentence, score in ans_sents:
                print('{0:50}\t{1}'.format(sentence, score[0]))
            print('')

            self.update({'val': 'Generating VDT and extracting Answer.'})
            best_ans, score, answers = deploy.extract_answer_from_sentences(
                ans_sents,
                query,
                verbose=True,
            )

        except Exception as e:

            return {'answers': [{'word': 'ERROR', 'score': str(e)}]}

        # Ignore: Phase 2-3: Input Module and Answer Module
        # answers = []
        # for ans, a_score in ans_sents.iteritems():
        #   words = deploy.extract_answer_from_sentence(ans, self.query)
        #   words = sorted(words, key=operator.itemgetter(1))
        #   for word, w_score in words.iteritems()[:5]:
        #       answers.append((word, w_score * a_score))
        # answers = sorted(answers, key=operator.itemgetter(1))
        # proc = subprocess.Popen(['python','test.py',query],shell=False,stdout=subprocess.PIPE)

        ans_list = []
        print('\nSystem: Candidate answers scored by Answer Extraction Module')
        for x in answers[:5]:
            print('{0:10}\t{1}'.format(x[0], float(x[1][0])))
            ans_list.append({'word': x[0], 'score': float(x[1][0])})

        ans_dict = {
            'val': 'Candidate answers scored by Answer Extraction Module',
            'answers': ans_list
        }

        return ans_dict
Esempio n. 6
0
print('Sentences selected by IR Module:')
print(para_sents)
print('-' * 100)

# Select Ans Sents - ABCNN
abcnn = abcnn_model()
ans_sents = abcnn.ans_select(question, para_sents)

print('\nSentence Ranking:')
for sentence, score in ans_sents:
    print('{0:50}\t{1}'.format(sentence, score[0]))
print('-' * 100)

results = deploy.extract_answer_from_sentences(
    ans_sents,
    question,
    vis=True,
)
best_ans, score, answers, tree_list, hidden_states = results

pca_hidden = get_pca_hidden(hidden_states)

print('VDT Generation:')
for tree in tree_list:
    print('Statement:', tree['sentence'])
    print('Tree:')
    tree['tree'].print(pca_glove, pca_hidden)
print('-' * 100)

ans_list = []
for x in answers[:5]: