Beispiel #1
0
    def testBatchSizeProvided(self, metric_class, expected_result):
        metric = metric_class(batch_size=2)

        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.first((), (), (), 1., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.mid((), (), (), 2., 1.),
                trajectory.last((), (), (), 3., 0.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.last((), (), (), 3., 0.),
                trajectory.boundary((), (), (), 0., 1.)
            ]))
        metric(
            nest_utils.stack_nested_arrays([
                trajectory.boundary((), (), (), 0., 1.),
                trajectory.first((), (), (), 1., 1.)
            ]))
        self.assertEqual(metric.result(), expected_result)
Beispiel #2
0
  def _create_misaligned_trajectories(self):

    def _concat_nested_tensors(nest1, nest2):
      return tf.nest.map_structure(lambda t1, t2: tf.concat([t1, t2], axis=0),
                                   nest1, nest2)

    # Order of args for trajectory methods:
    # observation, action, policy_info, reward, discount
    ts1 = _concat_nested_tensors(
        trajectory.first((), tf.constant([2]), (),
                         tf.constant([1.], dtype=tf.float32), [1.]),
        trajectory.boundary((), tf.constant([1]), (),
                            tf.constant([0.], dtype=tf.float32), [1.]))
    ts2 = _concat_nested_tensors(
        trajectory.last((), tf.constant([1]), (),
                        tf.constant([3.], dtype=tf.float32), [1.]),
        trajectory.first((), tf.constant([1]), (),
                         tf.constant([2.], dtype=tf.float32), [1.]))
    ts3 = _concat_nested_tensors(
        trajectory.boundary((), tf.constant([2]), (),
                            tf.constant([0.], dtype=tf.float32), [1.]),
        trajectory.last((), tf.constant([1]), (),
                        tf.constant([4.], dtype=tf.float32), [1.]))

    return [ts1, ts2, ts3]
Beispiel #3
0
    def setUp(self):
        super(BatchedPyMetricTest, self).setUp()
        # Order of args for trajectory methods:
        # observation, action, policy_info, reward, discount
        ts0 = nest_utils.stack_nested_tensors([
            trajectory.boundary((), (), (), 0., 1.),
            trajectory.boundary((), (), (), 0., 1.)
        ])
        ts1 = nest_utils.stack_nested_tensors([
            trajectory.first((), (), (), 1., 1.),
            trajectory.first((), (), (), 2., 1.)
        ])
        ts2 = nest_utils.stack_nested_tensors([
            trajectory.last((), (), (), 3., 1.),
            trajectory.last((), (), (), 4., 1.)
        ])
        ts3 = nest_utils.stack_nested_tensors([
            trajectory.boundary((), (), (), 0., 1.),
            trajectory.boundary((), (), (), 0., 1.)
        ])
        ts4 = nest_utils.stack_nested_tensors([
            trajectory.first((), (), (), 5., 1.),
            trajectory.first((), (), (), 6., 1.)
        ])
        ts5 = nest_utils.stack_nested_tensors([
            trajectory.last((), (), (), 7., 1.),
            trajectory.last((), (), (), 8., 1.)
        ])

        self._ts = [ts0, ts1, ts2, ts3, ts4, ts5]
Beispiel #4
0
  def _create_trajectories(self):
    # Order of args for trajectory methods:
    # observation, action, policy_info, reward, discount
    ts0 = nest_utils.stack_nested_tensors([
        trajectory.boundary((), (), (), 0., 1.),
        trajectory.boundary((), (), (), 0., 1.)
    ])
    ts1 = nest_utils.stack_nested_tensors([
        trajectory.first((), (), (), 1., 1.),
        trajectory.first((), (), (), 2., 1.)
    ])
    ts2 = nest_utils.stack_nested_tensors([
        trajectory.last((), (), (), 3., 1.),
        trajectory.last((), (), (), 4., 1.)
    ])
    ts3 = nest_utils.stack_nested_tensors([
        trajectory.boundary((), (), (), 0., 1.),
        trajectory.boundary((), (), (), 0., 1.)
    ])
    ts4 = nest_utils.stack_nested_tensors([
        trajectory.first((), (), (), 5., 1.),
        trajectory.first((), (), (), 6., 1.)
    ])
    ts5 = nest_utils.stack_nested_tensors([
        trajectory.last((), (), (), 7., 1.),
        trajectory.last((), (), (), 8., 1.)
    ])

    return [ts0, ts1, ts2, ts3, ts4, ts5]
Beispiel #5
0
 def setUp(self):
     super(PyDriverTest, self).setUp()
     f0 = np.array(0., dtype=np.float32)
     f1 = np.array(1., dtype=np.float32)
     # Order of args for trajectory methods:
     # (observation, action, policy_info, reward, discount)
     self._trajectories = [
         trajectory.first(0, 1, 2, f1, f1),
         trajectory.last(1, 2, 4, f1, f0),
         trajectory.boundary(3, 1, 2, f0, f1),
         trajectory.first(0, 1, 2, f1, f1),
         trajectory.last(1, 2, 4, f1, f0),
         trajectory.boundary(3, 1, 2, f0, f1),
         trajectory.first(0, 1, 2, f1, f1),
     ]
Beispiel #6
0
 def testZeroEpisodes(self, metric_class, expected_result):
     metric = metric_class()
     # Order of args for trajectory methods:
     # observation, action, policy_info, reward, discount
     metric(trajectory.boundary((), (), (), 0., 1.))
     metric(trajectory.first((), (), (), 1., 1.))
     self.assertEqual(expected_result, metric.result())
Beispiel #7
0
    def testAverageTwoEpisode(self, metric_class, expected_result):
        metric = metric_class()

        metric(trajectory.boundary((), (), (), 0., 1.))
        metric(trajectory.first((), (), (), 1., 1.))
        metric(trajectory.mid((), (), (), 2., 1.))
        metric(trajectory.last((), (), (), 3., 0.))
        metric(trajectory.boundary((), (), (), 0., 1.))

        # TODO(kbanoop): Add optional next_step_type arg to trajectory.first. Or
        # implement trajectory.first_last().
        metric(
            trajectory.Trajectory(ts.StepType.FIRST, (), (), (),
                                  ts.StepType.LAST, -6., 1.))

        self.assertEqual(expected_result, metric.result())
Beispiel #8
0
    def testAverageOneEpisode(self, metric_class, expected_result):
        metric = metric_class()

        metric(trajectory.boundary((), (), (), 0., 1.))
        metric(trajectory.mid((), (), (), 1., 1.))
        metric(trajectory.mid((), (), (), 2., 1.))
        metric(trajectory.last((), (), (), 3., 0.))
        self.assertEqual(expected_result, metric.result())
Beispiel #9
0
 def _get_padding_step(
     self, example_trajectory: trajectory_lib.Trajectory
 ) -> trajectory_lib.Trajectory:
   """Get the padding step to append to the cache."""
   zero_step = trajectory_lib.boundary(
       tf.nest.map_structure(tf.zeros_like, example_trajectory.observation),
       tf.nest.map_structure(tf.zeros_like, example_trajectory.action),
       tf.nest.map_structure(tf.zeros_like, example_trajectory.policy_info),
       tf.nest.map_structure(tf.zeros_like, example_trajectory.reward),
       tf.nest.map_structure(tf.zeros_like, example_trajectory.discount),
   )
   return zero_step
Beispiel #10
0
    def testSaveRestore(self):
        metrics = [
            py_metrics.AverageReturnMetric(),
            py_metrics.AverageEpisodeLengthMetric(),
            py_metrics.EnvironmentSteps(),
            py_metrics.NumberOfEpisodes()
        ]

        for metric in metrics:
            metric(trajectory.boundary((), (), (), 0., 1.))
            metric(trajectory.mid((), (), (), 1., 1.))
            metric(trajectory.mid((), (), (), 2., 1.))
            metric(trajectory.last((), (), (), 3., 0.))

        checkpoint = tf.train.Checkpoint(**{m.name: m for m in metrics})
        prefix = self.get_temp_dir() + '/ckpt'
        save_path = checkpoint.save(prefix)
        for metric in metrics:
            metric.reset()
            self.assertEqual(0, metric.result())
        checkpoint.restore(save_path).assert_consumed()
        for metric in metrics:
            self.assertGreater(metric.result(), 0)
    #TRAINING LOOP
    for i in tqdm(range(5000), "Training"):
        #QUERY TABLE FOR PARTIAL ROWS
        regex_filter = '^cartpole_trajectory_{}$'.format(i)
        row_filter = row_filters.RowKeyRegexFilter(regex_filter)
        filtered_rows = table.read_rows(filter_=row_filter)

        for row in filtered_rows:
            bytes_traj = row.cells['trajectory']['traj'.encode()][0].value
            bytes_info = row.cells['trajectory']['info'.encode()][0].value
            traj, info = Trajectory(), Info()
            traj.ParseFromString(bytes_traj)
            info.ParseFromString(bytes_info)

            traj_shape = np.append(np.array(info.num_steps), np.array(info.vector_obs_spec))
            observations = np.array(traj.vector_obs).reshape(traj_shape)
            traj_obs = np.rollaxis(np.array([observations, np.roll(observations, 1)]), 0 , 2)
            traj_actions = np.rollaxis(np.array([traj.actions, np.roll(traj.actions, 1)]), 0 , 2)
            traj_rewards = np.rollaxis(np.array([traj.rewards, np.roll(traj.rewards, 1)]), 0 , 2)
            traj_discounts = np.ones((info.num_steps,2))

            traj_obs = tf.constant(traj_obs, dtype=tf.float32)
            traj_actions = tf.constant(traj_actions, dtype=tf.int32)
            policy_info = ()
            traj_rewards = tf.constant(traj_rewards, dtype=tf.float32)
            traj_discounts = tf.constant(traj_discounts, dtype=tf.float32)

            traj = trajectory.boundary(traj_obs, traj_actions, policy_info, traj_rewards, traj_discounts)
            train_loss = tf_agent.train(traj)
        
        tf_agent._q_network.save_weights("cartpole_model.h5")