def state_update(self, obs, action): # update history if self.dst: self.dst.state['history'].append([str(action)]) # NLU parsing input_act = self.nlu.parse( obs, sum(self.dst.state['history'], []) if self.dst else []) if self.nlu else obs # state tracking state = self.dst.update(input_act) if self.dst else input_act # update history if self.dst: self.dst.state['history'][-1].append(str(obs)) # encode state encoded_state = self.state_encoder.encode( state) if self.state_encoder else state if self.nlu and self.dst: self.dst.state['user_action'] = input_act elif self.dst and not isinstance( self.dst, word_dst.MDBTTracker): # for act-in act-out agent self.dst.state['user_action'] = obs logger.nl(f'User utterance: {obs}') logger.act(f'Inferred user action: {input_act}') logger.state(f'Dialog state: {state}') return input_act, state, encoded_state
def act(self, obs): '''Standard act method from algorithm.''' action = self.algorithm.act(self.body.encoded_state) self.body.action = action output_act, decoded_action = self.action_decode(action, self.body.state) logger.act(f'System action: {action}') logger.nl(f'System utterance: {decoded_action}') return decoded_action
def reset(self, train_mode, config): self.simulator.init_session() self.history = [] user_response, user_act, session_over, reward = self.simulator.response("null", self.history) self.last_act = user_act logger.act(f'User action: {user_act}') self.history.extend(["null", f'{user_response}']) self.env_info = [State(user_response, 0., session_over)] # update evaluator if self.evaluator: self.evaluator.add_goal(self.get_goal()) logger.act(f'Goal: {self.get_goal()}') return self.env_info
def step(self, action): user_response, user_act, session_over, reward = self.simulator.response(action, self.history) self.last_act = user_act self.history.extend([f'{action}', f'{user_response}']) logger.act(f'Inferred system action: {self.get_sys_act()}') # update evaluator if self.evaluator: self.evaluator.add_sys_da(self.get_sys_act()) self.evaluator.add_usr_da(self.get_last_act()) if session_over: reward = 2.0 * self.simulator.policy.max_turn if self.evaluator.task_success() else -1.0 * self.simulator.policy.max_turn else: reward = -1.0 self.env_info = [State(user_response, reward, session_over)] return self.env_info