def make_greedy_strategy(train_dir, verbose=False): """Load the latest checkpoint from train_dir, make a greedy strategy.""" session = tf.Session() model = FeedModel() saver = tf.train.Saver() saver.restore(session, tf.train.latest_checkpoint(train_dir)) get_q_values = learning.make_get_q_values(session, model) greedy_strategy = play.make_greedy_strategy(get_q_values, verbose) return greedy_strategy
def get_all_q_values(train_dir): """Play randomly, compute q-values for all states.""" session = tf.Session() model = FeedModel() saver = tf.train.Saver() saver.restore(session, tf.train.latest_checkpoint(train_dir)) get_q_values = learning.make_get_q_values(session, model) experiences = ExperienceCollector().collect(play.random_strategy, 100) all_q_values = [] for experience in experiences: all_q_values += list(get_q_values(experience.next_state)) return all_q_values