def inference(question, include_blacklisted = True): answers = inference_helper(question) answers = detokenize(answers) answers = replace_in_answers(answers, 'answers') answers_rate = score_answers(answers) try: index = answers_rate.index(1) score = 1 except: index = None if index is None and include_blacklisted: try: index = answers_rate.index(0) score = 0 except: index = 0 score = -1 if index is None: index = 0 score = -1 return {'answers': answers, 'index': index, 'score': score}
def process_questions(questions, include_blacklisted=True): # Make a list if not isinstance(questions, list): questions = [questions] # Clean and tokenize prepared_questions = [] for question in questions: question = question.strip() prepared_questions.append( apply_bpe(tokenize(question)) if question else '##emptyquestion##') # Run inference answers_list = inference_helper(prepared_questions) # Process answers prepared_answers_list = [] for index, answers in enumerate(answers_list): answers = detokenize(answers) answers = replace_in_answers(answers) answers_score = score_answers(answers) best_index, best_score = get_best_score(answers_score, include_blacklisted) if prepared_questions[index] == '##emptyquestion##': prepared_answers_list.append(None) else: prepared_answers_list.append({ 'answers': answers, 'scores': answers_score, 'best_index': best_index, 'best_score': best_score }) return prepared_answers_list
def add_template(self, utterance, dialogue_state): if self.finalized: print('Cannot add templates.') return if utterance.ambiguous_template or self.ambiguous_template( utterance.template): return proposal_type = utterance.lf.proposal_type if utterance.lf.intent == 'propose' else 'none' row = { 'tag': utterance.lf.intent, 'template': detokenize(utterance.template), 'proposal_type': proposal_type, 'context_tag': dialogue_state.partner_act, 'context': detokenize(dialogue_state.partner_template), 'id': self.template_id, } #print 'add template:' #print 'context:', row['context'] #print 'template:', row['template'] self.template_id += 1 self.templates.append(row)
def add_template(self, utterance, dialogue_state): if self.finalized: print 'Cannot add templates.' return if not utterance.template or self.ambiguous_template( utterance.template): return row = { 'category': dialogue_state.kb.category, 'role': dialogue_state.kb.role, 'tag': utterance.lf.intent, 'template': detokenize(utterance.template), 'context_tag': dialogue_state.partner_act, 'context': detokenize(dialogue_state.partner_template), 'id': self.template_id, } #print 'add template:' #print 'context:', row['context'] #print 'template:', row['template'] self.template_id += 1 self.templates.append(row)
def retrieve(self, context, used_templates=None, topk=20, T=1., **kwargs): loc = self.get_filter(used_templates=used_templates, **kwargs) if loc is None: return None if isinstance(context, list): context = detokenize(context) features = self.vectorizer.transform([context]) scores = self.tfidf_matrix * features.T scores = scores.todense()[loc] scores = np.squeeze(np.array(scores), axis=1) ids = np.argsort(scores)[::-1][:topk] candidates = self.templates[loc] candidates = candidates.iloc[ids] rows = self.templates[loc] rows = rows.iloc[ids] logp = rows['logp'].values return self.sample(logp, candidates, T)
def inference_internal(question): answers = inference_helper(question) answers = detokenize(answers) answers = replace_in_answers(answers, 'answers') answers_rate = score_answers(answers, 'answers') return (answers, answers_rate)
def detokenize_templates(self): #for k, temps in self.templates.iteritems(): # for temp in temps: for row in self.templates: row['response'] = detokenize(row['response']) row['context'] = detokenize(row['context'])
['word ...', 'word...'], ['https : / / www.youtube.com / watch ? v = r 8 b 0 PWR 1 qxI', 'https://www.youtube.com/watch?v=r8b0PWR1qxI'], ['test https : / / www.youtube.com / watch ? v = r 8 b 0 PWR 1 qxI test', 'test https://www.youtube.com/watch?v=r8b0PWR1qxI test'], ['http : / / i.imgur.com / vncZ 8 J 3.gif', 'http://i.imgur.com/vncZ8J3.gif'], ['test http : / / i.imgur.com / vncZ 8 J 3.gif test', 'test http://i.imgur.com/vncZ8J3.gif test'], ['Thanks . : )', 'Thanks. :)'], ['Thanks ! : )', 'Thanks! :)'], ['$ 3 9 0', '$390'], ['http : / / www.urbandictionary.com / define.php ? term = potato + meme', 'http://www.urbandictionary.com/define.php?term=potato+meme'], ['test http : / / www.urbandictionary.com / define.php ? term = potato + meme + test test', 'test http://www.urbandictionary.com/define.php?term=potato+meme+test test'], ['¿ Que?', '¿Que?'], ['o 7', 'o7'], ['No 7', 'No 7'], ['o 7.', 'o7.'], ['/ r / me_irl', '/r/me_irl'], ['Thank you ! : )', 'Thank you! :)'], ['Thank you : )', 'Thank you :)'], ['test Thanks : )', 'test Thanks :)'], ['m 8', 'm8'], ['com 8', 'com 8'], ['m 82', 'm 82'], ['/ r / subreddit', '/r/subreddit'], ['test / r / subreddit test', 'test /r/subreddit test'], ] init() for test in tests: detokenized_answers = detokenize([test[0]]) print('[{}] {} -> {}{}'.format(Fore.GREEN + 'PASS' + Fore.RESET if detokenized_answers[0] == test[1] else Fore.RED + 'FAIL' + Fore.RESET, test[0], test[1], '' if detokenized_answers[0] == test[1] else ' Result: {}'.format(detokenized_answers[0])))
'http://www.urbandictionary.com/define.php?term=potato+meme' ], [ 'test http : / / www.urbandictionary.com / define.php ? term = potato + meme + test test', 'test http://www.urbandictionary.com/define.php?term=potato+meme+test test' ], ['¿ Que?', '¿Que?'], ['o 7', 'o7'], ['No 7', 'No 7'], ['o 7.', 'o7.'], ['/ r / me_irl', '/r/me_irl'], ['Thank you ! : )', 'Thank you! :)'], ['Thank you : )', 'Thank you :)'], ['test Thanks : )', 'test Thanks :)'], ['m 8', 'm8'], ['com 8', 'com 8'], ['m 82', 'm 82'], ['/ r / subreddit', '/r/subreddit'], ['test / r / subreddit test', 'test /r/subreddit test'], ] init() for test in tests: detokenized_answers = detokenize([test[0]]) print('[{}] {} -> {}{}'.format( Fore.GREEN + 'PASS' + Fore.RESET if detokenized_answers[0] == test[1] else Fore.RED + 'FAIL' + Fore.RESET, test[0], test[1], '' if detokenized_answers[0] == test[1] else ' Result: {}'.format(detokenized_answers[0])))
def inference_internal(self, question): answers = self.do_inference(tokenize(question)) answers = detokenize(answers) answers = replace_in_answers(answers, 'answers') answers_rate = score_answers(answers, 'answers') return (answers, answers_rate)