def __init__(self, env_space, cmdl): self.name = "Evaluation" self.actions = env_space[0] self.action_no = action_no = self.actions.n self.cmdl = cmdl self.epsilon = 0.05 if cmdl.agent_type == "dqn": self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len, self.action_no, cmdl.hidden_size) if self.cmdl.cuda: self.policy.cuda() self.policy_evaluation = DeterministicPolicy(policy) elif cmdl.agent_type == "categorical": self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len, (action_no, cmdl.atoms_no), hidden_size=cmdl.hidden_size) if self.cmdl.cuda: self.policy.cuda() self.policy_evaluation = CategoricalPolicyEvaluation(policy, cmdl) print("[%s] Evaluating %s agent." % (self.name, cmdl.agent_type)) self.max_q = -1000
def __init__(self, action_space, cmdl, is_training=True): DQNAgent.__init__(self, action_space, cmdl, is_training) self.name = "Categorical_agent" self.cmdl = cmdl hist_len, action_no = cmdl.hist_len, self.action_no self.policy = policy = get_model(cmdl.estimator, 1, hist_len, (action_no, cmdl.atoms_no), hidden_size=cmdl.hidden_size) self.target = target = get_model(cmdl.estimator, 1, hist_len, (action_no, cmdl.atoms_no), hidden_size=cmdl.hidden_size) if self.cmdl.cuda: self.policy.cuda() self.target.cuda() self.policy_evaluation = CategoricalPolicyEvaluation(policy, cmdl) self.policy_improvement = CategoricalPolicyImprovement( policy, target, cmdl)
def __init__(self, action_space, cmdl, is_training=True): BaseAgent.__init__(self, action_space, is_training) self.name = "DQN_agent" self.cmdl = cmdl eps = self.cmdl.epsilon e_steps = self.cmdl.epsilon_steps self.policy = policy = get_model(cmdl.estimator, 1, cmdl.hist_len, self.action_no, cmdl.hidden_size) self.target = target = get_model(cmdl.estimator, 1, cmdl.hist_len, self.action_no, cmdl.hidden_size) if self.cmdl.cuda: self.policy.cuda() self.target.cuda() self.policy_evaluation = DQNEvaluation(policy) self.policy_improvement = DQNImprovement(policy, target, cmdl) self.exploration = get_epsilon_schedule("linear", eps, 0.05, e_steps) self.replay_memory = ReplayMemory(capacity=cmdl.experience_replay) self.dtype = TorchTypes(cmdl.cuda) self.max_q = -1000