def test_no_crash_variable_duration(self):
   config = self._define_config()
   with config.unlocked:
     config.env = functools.partial(
         tools.MockEnvironment, observ_shape=(2, 3), action_shape=(3,),
         min_duration=5, max_duration=25)
     config.max_length = 25
     config.steps = 200
     config.network = networks.RecurrentGaussianPolicy
   for score in train.train(config, env_processes=False):
     float(score)
 def test_no_crash_ant(self):
   nets = networks.ForwardGaussianPolicy, networks.RecurrentGaussianPolicy
   for network in nets:
     config = self._define_config()
     with config.unlocked:
       config.env = 'Ant-v1'
       config.max_length = 200
       config.steps = 1000
       config.network = network
     for score in train.train(config, env_processes=True):
       float(score)
Exemple #3
0
 def test_no_crash_ant(self):
     nets = networks.ForwardGaussianPolicy, networks.RecurrentGaussianPolicy
     for network in nets:
         config = self._define_config()
         with config.unlocked:
             config.env = 'Ant-v1'
             config.max_length = 200
             config.steps = 1000
             config.network = network
         for score in train.train(config, env_processes=True):
             float(score)
Exemple #4
0
 def test_no_crash_variable_duration(self):
     config = self._define_config()
     with config.unlocked:
         config.env = functools.partial(tools.MockEnvironment,
                                        observ_shape=(2, 3),
                                        action_shape=(3, ),
                                        min_duration=5,
                                        max_duration=25)
         config.max_length = 25
         config.steps = 200
         config.network = networks.RecurrentGaussianPolicy
     for score in train.train(config, env_processes=False):
         float(score)
 def test_no_crash_observation_shape(self):
   nets = networks.ForwardGaussianPolicy, networks.RecurrentGaussianPolicy
   observ_shapes = (1,), (2, 3), (2, 3, 4)
   for network, observ_shape in itertools.product(nets, observ_shapes):
     config = self._define_config()
     with config.unlocked:
       config.env = functools.partial(
           tools.MockEnvironment, observ_shape, action_shape=(3,),
           min_duration=15, max_duration=15)
       config.max_length = 20
       config.steps = 100
       config.network = network
     for score in train.train(config, env_processes=False):
       float(score)
Exemple #6
0
 def test_no_crash_observation_shape(self):
     nets = networks.ForwardGaussianPolicy, networks.RecurrentGaussianPolicy
     observ_shapes = (1, ), (2, 3), (2, 3, 4)
     for network, observ_shape in itertools.product(nets, observ_shapes):
         config = self._define_config()
         with config.unlocked:
             config.env = functools.partial(tools.MockEnvironment,
                                            observ_shape,
                                            action_shape=(3, ),
                                            min_duration=15,
                                            max_duration=15)
             config.max_length = 20
             config.steps = 100
             config.network = network
         for score in train.train(config, env_processes=False):
             float(score)