Beispiel #1
0
def make_greedy_strategy(train_dir, verbose=False):
  """Load the latest checkpoint from train_dir, make a greedy strategy."""

  session = tf.Session()
  model = FeedModel()
  saver = tf.train.Saver()
  saver.restore(session, tf.train.latest_checkpoint(train_dir))

  get_q_values = learning.make_get_q_values(session, model)
  greedy_strategy = play.make_greedy_strategy(get_q_values, verbose)

  return greedy_strategy
Beispiel #2
0
def make_greedy_strategy(train_dir, verbose=False):
    """Load the latest checkpoint from train_dir, make a greedy strategy."""

    session = tf.Session()
    model = FeedModel()
    saver = tf.train.Saver()
    saver.restore(session, tf.train.latest_checkpoint(train_dir))

    get_q_values = learning.make_get_q_values(session, model)
    greedy_strategy = play.make_greedy_strategy(get_q_values, verbose)

    return greedy_strategy
Beispiel #3
0
def get_all_q_values(train_dir):
  """Play randomly, compute q-values for all states."""

  session = tf.Session()
  model = FeedModel()
  saver = tf.train.Saver()
  saver.restore(session, tf.train.latest_checkpoint(train_dir))

  get_q_values = learning.make_get_q_values(session, model)
  experiences = ExperienceCollector().collect(play.random_strategy, 100)

  all_q_values = []
  for experience in experiences:
    all_q_values += list(get_q_values(experience.next_state))

  return all_q_values