Beispiel #1
0
    def _convert_batch(self, py_batch, prev_z_py=None):
        u_input_py = py_batch['user']
        u_len_py = py_batch['u_len']
        kw_ret = {}
        if cfg.prev_z_method == 'concat' and prev_z_py is not None:
            for i in range(len(u_input_py)):
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1:
                    idx = prev_z_py[i].index(eob)
                    u_input_py[i] = prev_z_py[i][:idx + 1] + u_input_py[i]
                else:
                    u_input_py[i] = prev_z_py[i] + u_input_py[i]
                u_len_py[i] = len(u_input_py[i])
                for j, word in enumerate(prev_z_py[i]):
                    if word >= cfg.vocab_size:
                        prev_z_py[i][j] = 2 #unk
        elif cfg.prev_z_method == 'separate' and prev_z_py is not None:
            for i in range(len(prev_z_py)):
                eob = self.reader.vocab.encode('EOS_Z2')
                if eob in prev_z_py[i] and prev_z_py[i].index(eob) != len(prev_z_py[i]) - 1:
                    idx = prev_z_py[i].index(eob)
                    prev_z_py[i] = prev_z_py[i][:idx + 1]
                for j, word in enumerate(prev_z_py[i]):
                    if word >= cfg.vocab_size:
                        prev_z_py[i][j] = 2 #unk
            prev_z_input_np = pad_sequences(prev_z_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
            prev_z_len = np.array([len(_) for _ in prev_z_py])
            prev_z_input = cuda_(Variable(torch.from_numpy(prev_z_input_np).long()))
            kw_ret['prev_z_len'] = prev_z_len
            kw_ret['prev_z_input'] = prev_z_input
            kw_ret['prev_z_input_np'] = prev_z_input_np

        degree_input_np = np.array(py_batch['degree'])
        u_input_np = pad_sequences(u_input_py, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
        z_input_np = pad_sequences(py_batch['bspan'], padding='post').transpose((1, 0))
        m_input_np = pad_sequences(py_batch['response'], cfg.max_ts, padding='post', truncating='post').transpose(
            (1, 0))

        u_len = np.array(u_len_py)
        m_len = np.array(py_batch['m_len'])

        degree_input = cuda_(Variable(torch.from_numpy(degree_input_np).float()))
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
        z_input = cuda_(Variable(torch.from_numpy(z_input_np).long()))
        m_input = cuda_(Variable(torch.from_numpy(m_input_np).long()))

        kw_ret['z_input_np'] = z_input_np

        return u_input, u_input_np, z_input, m_input, m_input_np,u_len, m_len,  \
               degree_input, kw_ret
Beispiel #2
0
 def z2degree(gen_z):
     gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2')
     constraint_request = gen_bspan.split()
     constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \
         in constraint_request else constraint_request
     for j, ent in enumerate(constraints):
         constraints[j] = ent.replace('_', ' ')
     degree = self.reader.db_search(constraints)
     degree_input_list = self.reader._degree_vec_mapping(len(degree))
     degree_input = cuda_(Variable(torch.Tensor(degree_input_list).unsqueeze(0)))
     return degree, degree_input
Beispiel #3
0
    def response(self, usr):
        """
        Generate agent response given user input.

        Args:
            observation (str):
                The input to the agent.
        Returns:
            response (str):
                The response generated by the agent.
        """
        # print('usr:'******'EOS_U']
        u_len = np.array([len(usr_words)])
        usr_indices = self.m.reader.vocab.sentence_encode(usr_words)
        u_input_np = np.array(usr_indices)[:, np.newaxis]
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
        m_idx, z_idx, degree = self.m.m(mode='test',
                                        degree_input=None,
                                        z_input=None,
                                        u_input=u_input,
                                        u_input_np=u_input_np,
                                        u_len=u_len,
                                        m_input=None,
                                        m_input_np=None,
                                        m_len=None,
                                        turn_states=None,
                                        **self.kw_ret)
        venue = random.sample(degree, 1)[0] if degree else dict()
        l = [self.m.reader.vocab.decode(_) for _ in m_idx[0]]
        if 'EOS_M' in l:
            l = l[:l.index('EOS_M')]
        l_origin = []
        for word in l:
            if 'SLOT' in word:
                word = word[:-5]
                if word in venue.keys():
                    value = venue[word]
                    if value != '?':
                        l_origin.append(value)
            elif word.endswith('reference]'):
                if 'ref' in venue:
                    l_origin.append(venue['ref'])
            else:
                l_origin.append(word)
        sys = ' '.join(l_origin)
        sys = denormalize(sys)
        # print('sys:', sys)
        if cfg.prev_z_method == 'separate':
            eob = self.m.reader.vocab.encode('EOS_Z2')
            if eob in z_idx[0] and z_idx[0].index(eob) != len(z_idx[0]) - 1:
                idx = z_idx[0].index(eob)
                z_idx[0] = z_idx[0][:idx + 1]
            for j, word in enumerate(z_idx[0]):
                if word >= cfg.vocab_size:
                    z_idx[0][j] = 2  # unk
            prev_z_input_np = pad_sequences(z_idx,
                                            cfg.max_ts,
                                            padding='post',
                                            truncating='pre').transpose((1, 0))
            prev_z_len = np.array([len(_) for _ in z_idx])
            prev_z_input = cuda_(
                Variable(torch.from_numpy(prev_z_input_np).long()))
            self.kw_ret['prev_z_len'] = prev_z_len
            self.kw_ret['prev_z_input'] = prev_z_input
            self.kw_ret['prev_z_input_np'] = prev_z_input_np
        return sys
Beispiel #4
0
    def predict(self, usr, kw_ret):
        def z2degree(gen_z):
            gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2')
            constraint_request = gen_bspan.split()
            constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \
                in constraint_request else constraint_request
            for j, ent in enumerate(constraints):
                constraints[j] = ent.replace('_', ' ')
            degree = self.reader.db_search(constraints)
            degree_input_list = self.reader._degree_vec_mapping(len(degree))
            degree_input = cuda_(Variable(torch.Tensor(degree_input_list).unsqueeze(0)))
            return degree, degree_input
            
        self.m.eval()

        kw_ret['func'] = z2degree
        if 'prev_z_input_np' in kw_ret:
            kw_ret['prev_z_len'] = np.array(kw_ret['prev_z_len'])
            kw_ret['prev_z_input_np'] = np.array(kw_ret['prev_z_input_np'])
            kw_ret['prev_z_input'] = cuda_(Variable(torch.Tensor(kw_ret['prev_z_input_np']).long()))

        usr = word_tokenize(usr.lower())

        usr_words = usr + ['EOS_U']
        u_len = np.array([len(usr_words)])
        usr_indices = self.reader.vocab.sentence_encode(usr_words)
        u_input_np = np.array(usr_indices)[:, np.newaxis]
        u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
        m_idx, z_idx, degree = self.m(mode='test', degree_input=None, z_input=None,
                                        u_input=u_input, u_input_np=u_input_np, u_len=u_len,
                                        m_input=None, m_input_np=None, m_len=None,
                                        turn_states=None, **kw_ret)
        venue = random.sample(degree, 1)[0] if degree else dict()
        l = [self.reader.vocab.decode(_) for _ in m_idx[0]]
        if 'EOS_M' in l:
            l = l[:l.index('EOS_M')]
        l_origin = []
        for word in l:
            if 'SLOT' in word:
                word = word[:-5]
                if word in venue.keys():
                    value = venue[word]
                    if value != '?':
                        l_origin.append(value.replace(' ', '_'))
            else:
                l_origin.append(word)
        sys = ' '.join(l_origin)
        kw_ret['sys'] = sys
        if cfg.prev_z_method == 'separate':
            eob = self.reader.vocab.encode('EOS_Z2')
            if eob in z_idx[0] and z_idx[0].index(eob) != len(z_idx[0]) - 1:
                idx = z_idx[0].index(eob)
                z_idx[0] = z_idx[0][:idx + 1]
            for j, word in enumerate(z_idx[0]):
                if word >= cfg.vocab_size:
                    z_idx[0][j] = 2 #unk
            prev_z_input_np = pad_sequences(z_idx, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
            prev_z_len = np.array([len(_) for _ in z_idx])
            kw_ret['prev_z_len'] = prev_z_len.tolist()
            kw_ret['prev_z_input_np'] = prev_z_input_np.tolist()
            if 'prev_z_input' in kw_ret:
                del kw_ret['prev_z_input']

        del kw_ret['func']

        return kw_ret
Beispiel #5
0
 def interact(self):
     def z2degree(gen_z):
         gen_bspan = self.reader.vocab.sentence_decode(gen_z, eos='EOS_Z2')
         constraint_request = gen_bspan.split()
         constraints = constraint_request[:constraint_request.index('EOS_Z1')] if 'EOS_Z1' \
             in constraint_request else constraint_request
         for j, ent in enumerate(constraints):
             constraints[j] = ent.replace('_', ' ')
         degree = self.reader.db_search(constraints)
         degree_input_list = self.reader._degree_vec_mapping(len(degree))
         degree_input = cuda_(Variable(torch.Tensor(degree_input_list).unsqueeze(0)))
         return degree, degree_input
     
     def denormalize(uttr):
         uttr = uttr.replace(' -s', 's')
         uttr = uttr.replace(' -ly', 'ly')
         uttr = uttr.replace(' -er', 'er')
         return uttr
         
     self.m.eval()
     print('Start interaction.')
     kw_ret = dict({'func':z2degree})
     while True:
         usr = input('usr: '******'END':
             break
         if usr == 'RESET':
             kw_ret = dict({'func':z2degree})
             continue
         usr = word_tokenize(usr.lower())
         usr_words = usr + ['EOS_U']
         u_len = np.array([len(usr_words)])
         usr_indices = self.reader.vocab.sentence_encode(usr_words)
         u_input_np = np.array(usr_indices)[:, np.newaxis]
         u_input = cuda_(Variable(torch.from_numpy(u_input_np).long()))
         m_idx, z_idx, degree = self.m(mode='test', degree_input=None, z_input=None,
                                       u_input=u_input, u_input_np=u_input_np, u_len=u_len,
                                       m_input=None, m_input_np=None, m_len=None,
                                       turn_states=None, **kw_ret)
         venue = random.sample(degree, 1)[0] if degree else dict()
         l = [self.reader.vocab.decode(_) for _ in m_idx[0]]
         if 'EOS_M' in l:
             l = l[:l.index('EOS_M')]
         l_origin = []
         for word in l:
             if 'SLOT' in word:
                 word = word[:-5]
                 if word in venue.keys():
                     value = venue[word]
                     if value != '?':
                         l_origin.append(value)
             else:
                 l_origin.append(word)
         sys = ' '.join(l_origin)
         sys = denormalize(sys)
         print('sys:', sys)
         if cfg.prev_z_method == 'separate':
             eob = self.reader.vocab.encode('EOS_Z2')
             if eob in z_idx[0] and z_idx[0].index(eob) != len(z_idx[0]) - 1:
                 idx = z_idx[0].index(eob)
                 z_idx[0] = z_idx[0][:idx + 1]
             for j, word in enumerate(z_idx[0]):
                 if word >= cfg.vocab_size:
                     z_idx[0][j] = 2 #unk
             prev_z_input_np = pad_sequences(z_idx, cfg.max_ts, padding='post', truncating='pre').transpose((1, 0))
             prev_z_len = np.array([len(_) for _ in z_idx])
             prev_z_input = cuda_(Variable(torch.from_numpy(prev_z_input_np).long()))
             kw_ret['prev_z_len'] = prev_z_len
             kw_ret['prev_z_input'] = prev_z_input
             kw_ret['prev_z_input_np'] = prev_z_input_np