def load(self, path): """ """ ReplayMemory.load(self, path) self.priority = None f = open(path + '/priority.npy') self.priority = np.load(f) f.close()
tensorboard_monitor = TensorboardMonitor('./log', sess, counter) tensorboard_monitor.add_scalar_summary('score', 'per_game_summary') tensorboard_monitor.add_scalar_summary('training_loss', 'training_summary') for i in range(4): tensorboard_monitor.add_histogram_summary('Q%d_training' % i, 'training_summary') checkpoint_monitor = CheckpointRecorder(dqn_agent.dqn, replay_memory, counter, './checkpoints', sess) agi.add_listener(checkpoint_monitor) agi.add_listener(tensorboard_monitor) dqn_agent.add_listener(tensorboard_monitor) sess.run(tf.global_variables_initializer()) # Load the DQN and replay memory dqn_agent.dqn.restore('./checkpoints/dqn/7000000') replay_memory.load('./checkpoints/replay_memory/7000000') dqn_agent.update_target_network() def run(): cur_episode = 0 num_frames = 7000000 while counter.count < 50000000: score = agi.learn() tensorboard_monitor.record({'score': score}) elapsed_frames = counter.count - num_frames num_frames = counter.count print "Episode %d: Total Score = %d\t# Frames = %d\tTotal Frames = %d\tEpsilon: %f" % (cur_episode, score, elapsed_frames, num_frames, agent.epsilon) cur_episode += 1