def __init__(self, agent, policy_model): self.agent = agent self.policy_model = policy_model # Replay memory max_replay_memory_size = 2000 self.replay_memory = collections.deque(maxlen=max_replay_memory_size) rho = 0.5 self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho) optimizer = tf.train.AdamOptimizer(self.mle_learning_rate) loss = MaximumLikelihoodEstimation.calc_loss( self.policy_model.model_output, self.policy_model.model_output_indices) using_grad_clip = True grad_clip_val = 5.0 if not using_grad_clip: train_step = optimizer.minimize(loss) else: gvs = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var) if grad is not None else (grad, var) for grad, var in gvs] train_step = optimizer.apply_gradients(capped_gvs) # Create summaries for training summary_loss = tf.scalar_summary("Loss", loss) update_summaries = [summary_loss] AbstractLearning.__init__(self, policy_model, loss, train_step, update_summaries)
def __init__(self, agent, q_network, target_q_network): """ Creates constructor for an abstract learning setup """ self.agent = agent self.loss = None self.q_network = q_network self.target_q_network = target_q_network # Define epsilon greedy behaviour policy epsilon = 1.0 min_epsilon = 0.1 self.behaviour_policy = egp.EpsilonGreedyPolicy(epsilon, min_epsilon) # Replay memory and prioritized sweeping for sampling from the replay memory max_replay_memory_size = 2000 self.replay_memory = collections.deque(maxlen=max_replay_memory_size) rho = 0.5 self.ps = prioritized_sweeping.PrioritizedSweeping(0, rho) optimizer = tf.train.AdamOptimizer(self.rl_learning_rate) loss = self.calc_loss(self.q_network.model_output, self.q_network.model_output_indices, self.q_network.target) using_grad_clip = True grad_clip_val = 5.0 if not using_grad_clip: train_step = optimizer.minimize(loss) else: gvs = optimizer.compute_gradients(loss) capped_gvs = [(tf.clip_by_norm(grad, grad_clip_val), var) if grad is not None else (grad, var) for grad, var in gvs] train_step = optimizer.apply_gradients(capped_gvs) # Create summaries for training summary_loss = tf.scalar_summary("Loss", loss) update_summaries = [summary_loss] AbstractLearning.__init__(self, q_network, loss, train_step, update_summaries)