def __init__(self, learner, memory, simulator, settings, dqn_policy=None, rollout_policy=None): """ The learning agent is responsible for communicating and moving data between the three modules: Learner, Simulator, Memory Inputs: - learner: containes the neural network and the optimizer to train it - memory: expereince replay memory that can be minibatch sampled - simulator: simulates the environemnt - settings: hyper parameters for training - rollout_policy: rollout policy, random by default """ self.learner = learner self.memory = memory self.simulator = simulator # for populating the experience replay self.evaluator = deepcopy(simulator) # for evaluation self.dqn_policy = dqn_policy if dqn_policy is None: self.dqn_policy = DQNPolicy(learner) self.rollout_policy = rollout_policy if rollout_policy is None: self.rollout_policy = RandomPolicy(simulator.n_actions) self.set_params(settings) self.n_epochs = self.iterations / float(memory.memory_size) self.iteration = [] self.loss = [] self.q_ave = [] self.eval_iteration = [] self.r_eval = [] self.r_per_episode_eval = []