Example #1
0
    def state_update(self, obs, action):
        # update history
        if self.dst:
            self.dst.state['history'].append([str(action)])

        # NLU parsing
        input_act = self.nlu.parse(
            obs,
            sum(self.dst.state['history'], [])
            if self.dst else []) if self.nlu else obs
        input_act = da_normalize(input_act, role='usr')

        # state tracking
        state = self.dst.update(input_act) if self.dst else input_act

        # update history
        if self.dst:
            self.dst.state['history'][-1].append(str(obs))

        # encode state
        encoded_state = self.state_encoder.encode(
            state) if self.state_encoder else state

        if self.nlu and self.dst:
            self.dst.state['user_action'] = input_act
        elif self.dst and not isinstance(
                self.dst, (word_dst.MDBTTracker,
                           word_dst.TRADETracker)):  # for act-in act-out agent
            self.dst.state['user_action'] = obs

        logger.nl(f'User utterance: {obs}')
        logger.act(f'Inferred user action: {input_act}')
        logger.state(f'Dialog state: {state}')

        return input_act, state, encoded_state
Example #2
0
 def step(self, action):
     action = da_normalize(action, role='sys')
     user_response, user_act, session_over, reward = self.simulator.response(
         action, self.history)
     self.last_act = user_act
     self.history.extend([f'{action}', f'{user_response}'])
     logger.act(f'Inferred system action: {self.get_sys_act()}')
     # update evaluator
     if self.evaluator:
         self.evaluator.add_sys_da(self.get_sys_act())
         self.evaluator.add_usr_da(self.get_last_act())
         if session_over:
             reward = 2.0 * self.simulator.policy.max_turn if self.evaluator.task_success(
             ) else -1.0 * self.simulator.policy.max_turn
         else:
             reward = -1.0
     self.env_info = [State(user_response, reward, session_over)]
     return self.env_info