예제 #1
0
def _define_loop(graph, logdir, train_steps, eval_steps):
    """ Create and configure a training loop with training and evaluation phases.

    Args
    -----
    graph : Object providing graph elements via attributes.
    logdir : Log directory for storing checkpoints and summaries.
    train_steps : Number of training steps per epoch.
    eval_steps : Number of evaluation steps per epoch.

    Returns
    -------
    Loop object.
    """
    loop = tools.Loop(
        logdir, graph.step, graph.should_log, graph.do_report,
        graph.force_reset)
    loop.add_phase(
        'train_offense', graph.done, graph.score, graph.summary, train_steps,
        report_every=train_steps,
        log_every=train_steps,
        checkpoint_every=None,
        feed={graph.is_training: True,
              graph.is_optimizing_offense: True})
    loop.add_phase(
        'eval_offense', graph.done, graph.score, graph.summary, eval_steps,
        report_every=eval_steps,
        log_every=eval_steps,
        checkpoint_every=150 * eval_steps,
        feed={graph.is_training: False,
              graph.is_optimizing_offense: True})
    return loop
예제 #2
0
 def test_phases_feed(self):
     score = tf.placeholder(tf.float32, [])
     loop = tools.Loop(None)
     loop.add_phase('phase_1',
                    done=True,
                    score=score,
                    summary='',
                    steps=1,
                    report_every=1,
                    log_every=None,
                    checkpoint_every=None,
                    feed={score: 1})
     loop.add_phase('phase_2',
                    done=True,
                    score=score,
                    summary='',
                    steps=3,
                    report_every=1,
                    log_every=None,
                    checkpoint_every=None,
                    feed={score: 2})
     loop.add_phase('phase_3',
                    done=True,
                    score=score,
                    summary='',
                    steps=2,
                    report_every=1,
                    log_every=None,
                    checkpoint_every=None,
                    feed={score: 3})
     with self.test_session() as sess:
         sess.run(tf.global_variables_initializer())
         scores = list(loop.run(sess, saver=None, max_step=15))
     self.assertAllEqual([1, 2, 2, 2, 3, 3, 1, 2, 2, 2, 3, 3, 1, 2, 2],
                         scores)
예제 #3
0
def _define_loop(graph, eval_steps):
    """Create and configure an evaluation loop.

    Args:
      graph: Object providing graph elements via attributes.
      eval_steps: Number of evaluation steps per epoch.

    Returns:
      Loop object.
    """
    loop = tools.Loop(None, graph.step, graph.should_log, graph.do_report,
                      graph.force_reset)
    loop.add_phase('eval',
                   graph.done,
                   graph.score,
                   graph.summary,
                   eval_steps,
                   report_every=eval_steps,
                   log_every=None,
                   checkpoint_every=None,
                   feed={
                       graph.is_training: False,
                       graph.is_optimizing_offense: False
                   })
    return loop
예제 #4
0
def _define_loop(graph, logdir, train_steps, eval_steps, batch_env):
    """Create and configure a training loop with training and evaluation phases.
  This defines the computation for each iteration of the loop. Each iteration consists
  of two phases: the train phase and the evaluation phase:
  1. the train phase simulate train_steps of steps in ALL environments. All environments
     are moved by a single step synchronously, increasing the number of steps moved by the number
     of environments. Afterwards, a number of episodes will be generated. Those episodes are used
     to train the model.
  2. the evaluation phase generates a bunch of episodes the same way as the train phase.
     And those episodes are used to evaluate the model.
  Args:
    graph: Object providing graph elements via attributes.
    logdir: Log directory for storing checkpoints and summaries.
    train_steps: Number of training steps per epoch.
    eval_steps: Number of evaluation steps per epoch.

  Returns:
    Loop object.
  """

    default_external_action = np.zeros(batch_env.action_info[1])
    loop = tools.Loop(logdir, graph.step, graph.should_log, graph.do_report,
                      graph.force_reset)
    loop.add_phase('train',
                   graph.done,
                   graph.score,
                   graph.summary,
                   train_steps,
                   report_every=None,
                   log_every=train_steps // 2,
                   checkpoint_every=None,
                   feed={
                       graph.is_training: True,
                       graph.should_step: True,
                       graph.use_external_action: False,
                       graph.external_action: default_external_action
                   })
    loop.add_phase('eval',
                   graph.done,
                   graph.score,
                   graph.summary,
                   eval_steps,
                   report_every=eval_steps,
                   log_every=eval_steps // 2,
                   checkpoint_every=10 * eval_steps,
                   feed={
                       graph.is_training: False,
                       graph.should_step: True,
                       graph.use_external_action: False,
                       graph.external_action: default_external_action
                   })
    return loop
예제 #5
0
 def test_not_done(self):
     step = tf.Variable(0, False, dtype=tf.int32, name='step')
     done = tf.equal((step + 1) % 2, 0)
     score = tf.cast(step, tf.float32)
     loop = tools.Loop(None, step)
     loop.add_phase('phase_1',
                    done,
                    score,
                    summary='',
                    steps=1,
                    report_every=3)
     # Score:  0 1 2 3 4 5 6 7 8
     # Done:     x   x   x   x
     # Report:     x     x     x
     with self.test_session() as sess:
         sess.run(tf.global_variables_initializer())
         scores = list(loop.run(sess, saver=None, max_step=9))
     self.assertAllEqual([1, 4, 7], scores)
예제 #6
0
 def test_report_every_step(self):
     step = tf.Variable(0, False, dtype=tf.int32, name='step')
     loop = tools.Loop(None, step)
     loop.add_phase('phase_1',
                    done=True,
                    score=0,
                    summary='',
                    steps=1,
                    report_every=3)
     # Step:   0 1 2 3 4 5 6 7 8
     # Report:     x     x     x
     with self.test_session() as sess:
         sess.run(tf.global_variables_initializer())
         scores = loop.run(sess, saver=None, max_step=9)
         next(scores)
         self.assertEqual(3, sess.run(step))
         next(scores)
         self.assertEqual(6, sess.run(step))
         next(scores)
         self.assertEqual(9, sess.run(step))
예제 #7
0
 def test_average_score_over_phases(self):
     loop = tools.Loop(None)
     loop.add_phase('phase_1',
                    done=True,
                    score=1,
                    summary='',
                    steps=1,
                    report_every=2)
     loop.add_phase('phase_2',
                    done=True,
                    score=2,
                    summary='',
                    steps=2,
                    report_every=5)
     # Score:    1 2 2 1 2 2 1 2 2 1 2 2 1 2 2 1 2
     # Report 1:       x           x           x
     # Report 2:               x             x
     with self.test_session() as sess:
         sess.run(tf.global_variables_initializer())
         scores = list(loop.run(sess, saver=None, max_step=17))
     self.assertAllEqual([1, 2, 1, 2, 1], scores)
예제 #8
0
 def test_not_done_batch(self):
     step = tf.Variable(0, False, dtype=tf.int32, name='step')
     done = tf.equal([step % 3, step % 4], 0)
     score = tf.cast([step, step**2], tf.float32)
     loop = tools.Loop(None, step)
     loop.add_phase('phase_1',
                    done,
                    score,
                    summary='',
                    steps=1,
                    report_every=8)
     # Step:    0  2  4  6
     # Score 1: 0  2  4  6
     # Done 1:  x        x
     # Score 2: 0  4 16 32
     # Done 2:  x     x
     with self.test_session() as sess:
         sess.run(tf.global_variables_initializer())
         scores = list(loop.run(sess, saver=None, max_step=8))
         self.assertEqual(8, sess.run(step))
     self.assertAllEqual([(0 + 0 + 16 + 6) / 4], scores)