class AGENT: def __init__(self, num_states, num_actions, args): self.args = args self.brain = BRAIN(num_states, num_actions, self.args) def get_action(self, state, weight): action = self.brain.decide_action(state, weight) return action def get_Q_value(self, state, action): Q_vector = self.brain.compute_Q_value(state, action) return Q_vector def get_next_value(self, next_state, weight): V_next_vector = self.brain.compute_next_value(next_state, weight) return V_next_vector
class AGENT: def __init__(self, num_states, num_actions, args): self.args = args self.brain = BRAIN(num_states, num_actions, self.args) def get_action(self, state, exploration_noise): action = self.brain.decide_action(state) if exploration_noise is not None: action += torch.Tensor(exploration_noise.noise()) return action
class AGENT: def __init__(self, num_states, num_actions, args): self.args = args self.brain = BRAIN(num_states, num_actions, self.args) def update_DNNs(self, batch): self.brain.update_network(batch) def get_action(self, state, exploration_noise): action = self.brain.decide_action(state) #add noise############################################ if exploration_noise is not None: action += torch.Tensor(exploration_noise.noise()) if action[0, 0] > 1.0: action[0, 0] = 1.0 elif action[0, 0] < -1.0: action[0, 0] = -1.0 else: action[0, 0] = action[0, 0] ###################################################### return action def update_target_DNNs(self): self.brain.update_target_network()
def __init__(self, num_states, num_actions, args): self.args = args self.brain = BRAIN(num_states, num_actions, self.args)