def inference(question, include_blacklisted = True):
    answers = inference_helper(question)
    answers = detokenize(answers)
    answers = replace_in_answers(answers, 'answers')
    answers_rate = score_answers(answers)

    try:
        index = answers_rate.index(1)
        score = 1
    except:
        index = None

    if index is None and include_blacklisted:
        try:
            index = answers_rate.index(0)
            score = 0
        except:
            index = 0
            score = -1

    if index is None:
        index = 0
        score = -1

    return {'answers': answers, 'index': index, 'score': score}
Exemple #2
0
def process_questions(questions, include_blacklisted=True):
    # Make a list
    if not isinstance(questions, list):
        questions = [questions]

    # Clean and tokenize
    prepared_questions = []
    for question in questions:
        question = question.strip()
        prepared_questions.append(
            apply_bpe(tokenize(question)) if question else '##emptyquestion##')

    # Run inference
    answers_list = inference_helper(prepared_questions)

    # Process answers
    prepared_answers_list = []
    for index, answers in enumerate(answers_list):
        answers = detokenize(answers)
        answers = replace_in_answers(answers)
        answers_score = score_answers(answers)
        best_index, best_score = get_best_score(answers_score,
                                                include_blacklisted)

        if prepared_questions[index] == '##emptyquestion##':
            prepared_answers_list.append(None)
        else:
            prepared_answers_list.append({
                'answers': answers,
                'scores': answers_score,
                'best_index': best_index,
                'best_score': best_score
            })

    return prepared_answers_list
Exemple #3
0
 def add_template(self, utterance, dialogue_state):
     if self.finalized:
         print('Cannot add templates.')
         return
     if utterance.ambiguous_template or self.ambiguous_template(
             utterance.template):
         return
     proposal_type = utterance.lf.proposal_type if utterance.lf.intent == 'propose' else 'none'
     row = {
         'tag': utterance.lf.intent,
         'template': detokenize(utterance.template),
         'proposal_type': proposal_type,
         'context_tag': dialogue_state.partner_act,
         'context': detokenize(dialogue_state.partner_template),
         'id': self.template_id,
     }
     #print 'add template:'
     #print 'context:', row['context']
     #print 'template:', row['template']
     self.template_id += 1
     self.templates.append(row)
Exemple #4
0
 def add_template(self, utterance, dialogue_state):
     if self.finalized:
         print 'Cannot add templates.'
         return
     if not utterance.template or self.ambiguous_template(
             utterance.template):
         return
     row = {
         'category': dialogue_state.kb.category,
         'role': dialogue_state.kb.role,
         'tag': utterance.lf.intent,
         'template': detokenize(utterance.template),
         'context_tag': dialogue_state.partner_act,
         'context': detokenize(dialogue_state.partner_template),
         'id': self.template_id,
     }
     #print 'add template:'
     #print 'context:', row['context']
     #print 'template:', row['template']
     self.template_id += 1
     self.templates.append(row)
Exemple #5
0
    def retrieve(self, context, used_templates=None, topk=20, T=1., **kwargs):
        loc = self.get_filter(used_templates=used_templates, **kwargs)
        if loc is None:
            return None

        if isinstance(context, list):
            context = detokenize(context)
        features = self.vectorizer.transform([context])
        scores = self.tfidf_matrix * features.T
        scores = scores.todense()[loc]
        scores = np.squeeze(np.array(scores), axis=1)
        ids = np.argsort(scores)[::-1][:topk]

        candidates = self.templates[loc]
        candidates = candidates.iloc[ids]
        rows = self.templates[loc]
        rows = rows.iloc[ids]
        logp = rows['logp'].values

        return self.sample(logp, candidates, T)
def inference_internal(question):
    answers = inference_helper(question)
    answers = detokenize(answers)
    answers = replace_in_answers(answers, 'answers')
    answers_rate = score_answers(answers, 'answers')
    return (answers, answers_rate)
Exemple #7
0
 def detokenize_templates(self):
     #for k, temps in self.templates.iteritems():
     #    for temp in temps:
     for row in self.templates:
         row['response'] = detokenize(row['response'])
         row['context'] = detokenize(row['context'])
    ['word ...', 'word...'],
    ['https : / / www.youtube.com / watch ? v = r 8 b 0 PWR 1 qxI', 'https://www.youtube.com/watch?v=r8b0PWR1qxI'],
    ['test https : / / www.youtube.com / watch ? v = r 8 b 0 PWR 1 qxI test', 'test https://www.youtube.com/watch?v=r8b0PWR1qxI test'],
    ['http : / / i.imgur.com / vncZ 8 J 3.gif', 'http://i.imgur.com/vncZ8J3.gif'],
    ['test http : / / i.imgur.com / vncZ 8 J 3.gif test', 'test http://i.imgur.com/vncZ8J3.gif test'],
    ['Thanks . : )', 'Thanks. :)'],
    ['Thanks ! : )', 'Thanks! :)'],
    ['$ 3 9 0', '$390'],
    ['http : / / www.urbandictionary.com / define.php ? term = potato + meme', 'http://www.urbandictionary.com/define.php?term=potato+meme'],
    ['test http : / / www.urbandictionary.com / define.php ? term = potato + meme + test test', 'test http://www.urbandictionary.com/define.php?term=potato+meme+test test'],
    ['¿ Que?', '¿Que?'],
    ['o 7', 'o7'],
    ['No 7', 'No 7'],
    ['o 7.', 'o7.'],
    ['/ r / me_irl', '/r/me_irl'],
    ['Thank you ! : )', 'Thank you! :)'],
    ['Thank you : )', 'Thank you :)'],
    ['test Thanks : )', 'test Thanks :)'],
    ['m 8', 'm8'],
    ['com 8', 'com 8'],
    ['m 82', 'm 82'],
    ['/ r / subreddit', '/r/subreddit'],
    ['test / r / subreddit test', 'test /r/subreddit test'],
]

init()

for test in tests:
    detokenized_answers = detokenize([test[0]])
    print('[{}]  {}  ->  {}{}'.format(Fore.GREEN + 'PASS' + Fore.RESET if detokenized_answers[0] == test[1] else Fore.RED + 'FAIL' + Fore.RESET, test[0], test[1], '' if detokenized_answers[0] == test[1] else '  Result: {}'.format(detokenized_answers[0])))
        'http://www.urbandictionary.com/define.php?term=potato+meme'
    ],
    [
        'test http : / / www.urbandictionary.com / define.php ? term = potato + meme + test test',
        'test http://www.urbandictionary.com/define.php?term=potato+meme+test test'
    ],
    ['¿ Que?', '¿Que?'],
    ['o 7', 'o7'],
    ['No 7', 'No 7'],
    ['o 7.', 'o7.'],
    ['/ r / me_irl', '/r/me_irl'],
    ['Thank you ! : )', 'Thank you! :)'],
    ['Thank you : )', 'Thank you :)'],
    ['test Thanks : )', 'test Thanks :)'],
    ['m 8', 'm8'],
    ['com 8', 'com 8'],
    ['m 82', 'm 82'],
    ['/ r / subreddit', '/r/subreddit'],
    ['test / r / subreddit test', 'test /r/subreddit test'],
]

init()

for test in tests:
    detokenized_answers = detokenize([test[0]])
    print('[{}]  {}  ->  {}{}'.format(
        Fore.GREEN + 'PASS' +
        Fore.RESET if detokenized_answers[0] == test[1] else Fore.RED +
        'FAIL' + Fore.RESET, test[0], test[1], '' if detokenized_answers[0]
        == test[1] else '  Result: {}'.format(detokenized_answers[0])))
 def inference_internal(self, question):
     answers = self.do_inference(tokenize(question))
     answers = detokenize(answers)
     answers = replace_in_answers(answers, 'answers')
     answers_rate = score_answers(answers, 'answers')
     return (answers, answers_rate)