def state_update(self, obs, action): # update history if self.dst: self.dst.state['history'].append([str(action)]) # NLU parsing input_act = self.nlu.parse( obs, sum(self.dst.state['history'], []) if self.dst else []) if self.nlu else obs input_act = da_normalize(input_act, role='usr') # state tracking state = self.dst.update(input_act) if self.dst else input_act # update history if self.dst: self.dst.state['history'][-1].append(str(obs)) # encode state encoded_state = self.state_encoder.encode( state) if self.state_encoder else state if self.nlu and self.dst: self.dst.state['user_action'] = input_act elif self.dst and not isinstance( self.dst, (word_dst.MDBTTracker, word_dst.TRADETracker)): # for act-in act-out agent self.dst.state['user_action'] = obs logger.nl(f'User utterance: {obs}') logger.act(f'Inferred user action: {input_act}') logger.state(f'Dialog state: {state}') return input_act, state, encoded_state
def step(self, action): action = da_normalize(action, role='sys') user_response, user_act, session_over, reward = self.simulator.response( action, self.history) self.last_act = user_act self.history.extend([f'{action}', f'{user_response}']) logger.act(f'Inferred system action: {self.get_sys_act()}') # update evaluator if self.evaluator: self.evaluator.add_sys_da(self.get_sys_act()) self.evaluator.add_usr_da(self.get_last_act()) if session_over: reward = 2.0 * self.simulator.policy.max_turn if self.evaluator.task_success( ) else -1.0 * self.simulator.policy.max_turn else: reward = -1.0 self.env_info = [State(user_response, reward, session_over)] return self.env_info