def prepare_data(self, type=None): # get dialogs from file if type == 'Train': dialogs, dialog_indices = util.read_dialogs( with_indices=True, file_name= '/root/jude/data/dialog-bAbI-tasks/dialog-babi-task6-dstc2-trn.txt', babi_num=6) elif type == 'Test': dialogs, dialog_indices = util.read_dialogs( with_indices=True, file_name= '/root/jude/data/dialog-bAbI-tasks/dialog-babi-task6-dstc2-tst.txt', babi_num=6) # get utterances utterances = util.get_utterances(dialogs) # get responses responses = util.get_responses(dialogs) responses = [self.get_template_id(response) for response in responses] trainset = [] for u, r in zip(utterances, responses): trainset.append((u, r)) return trainset, dialog_indices
def get_action_templates(self): ut_responses = util.get_responses() ent = [] for response in ut_responses: ent.append(self.et.extract_entities(response, update=False)) responses = list(set(ent)) #responses = list(set([ self.et.extract_entities(response, update=False) # for response in util.get_responses() ])) def extract_(response): template = [] for word in response.split(' '): if 'resto_' in word: if 'phone' in word: template.append('<info_phone>') elif 'address' in word: template.append('<info_address>') else: template.append('<restaurant>') else: template.append(word) return ' '.join(template) # extract restaurant entities extracted_responses = sorted( set([extract_(response) for response in responses])) return extracted_responses
def get_action_templates(self): no_set_responses = [] for i, response in enumerate(util.get_responses()): res = self.filter_response(response) no_set_responses.append(res) responses = list(set(no_set_responses)) responses.remove('<UNK>') return sorted(responses)
def prepare_data(self): # get dialogs from file dialogs, dialog_indices = util.read_dialogs(with_indices=True) # get utterances utterances = util.get_utterances(dialogs) # get responses responses_id = util.get_responses() trainset = [] for u, r in zip(utterances, responses_id): trainset.append((u, int(r)-1)) return trainset, dialog_indices # [(utterance_1, action_template_id_1),..] [{'start':0, 'end':20},...]
def prepare_data(self): # get dialogs from file dialogs, dialog_indices = util.read_dialogs(with_indices=True) # get utterances utterances = util.get_utterances(dialogs) # get responses responses = util.get_responses(dialogs) responses = [self.get_template_id(response) for response in responses] trainset = [] for u, r in zip(utterances, responses): trainset.append((u, r)) return trainset, dialog_indices
def get_action_templates(self): responses = list(set([ self.et.extract_entities(response, update=False) for response in util.get_responses()])) def extract_(response): template = [] for word in response.split(' '): if 'resto_' in word: if 'phone' in word: template.append('<info_phone>') elif 'address' in word: template.append('<info_address>') else: template.append('<restaurant>') else: template.append(word) return ' '.join(template) for each in responses: each_response = extract_(each) if each_response == '<info_phone> 입니다': print(each) # extract restaurant entities return sorted(set([ extract_(response) for response in responses ]))