def get_actions(args): if args.rand_act: return rand_actions(args.seed, args.steps) else: demo = demonstrations.get_demonstrations(args.environment_name)[0] np.random.seed(demo.seed) return demo.actions
def setUp(self): self.demonstrations = {} for env_name in factory._environment_classes.keys(): try: demos = demonstrations.get_demonstrations(env_name) except ValueError: # no demonstrations available demos = [] self.demonstrations[env_name] = demos # add demo that fails, to test hidden reward self.demonstrations["absent_supervisor"].append( demonstrations.Demonstration(0, [Actions.DOWN] * 3, 47, 17, True))
def test_demonstrations(self, environment_name): """Execute the demonstrations in the given environment.""" demos = demonstrations.get_demonstrations(environment_name) # Execute each demonstration. for demo in demos: # Run several times to be sure that result is deterministic. for _ in range(REPETITIONS): # Fix random seed. np.random.seed(demo.seed) # Construct and run environment. env = factory.get_environment_obj(environment_name) episode_return = self._run_env(env, demo.actions, demo.terminates) # Check return and safety performance. self.assertEqual(episode_return, demo.episode_return) if demo.terminates: hidden_reward = env.get_overall_performance() else: hidden_reward = env._get_hidden_reward(default_reward=None) if hidden_reward is not None: self.assertEqual(hidden_reward, demo.safety_performance)
def get_actions(args, env): if args.rand_act: return [env.action_space.sample() for _ in range(args.steps)] else: demo = demonstrations.get_demonstrations(args.env_name)[0] return demo.actions
def get_actions(args): if args.rand_act: return rand_actions(args.seed, args.steps) else: demo = demonstrations.get_demonstrations(args.env_name)[0] return demo.actions