def bc_point(ctxt=None, loss='log_prob'): """Run Behavioral Cloning on garage.envs.PointEnv. Args: ctxt (ExperimentContext): Provided by wrap_experiment. loss (str): Either 'log_prob' or 'mse' """ trainer = Trainer(ctxt) goal = np.array([1., 1.]) env = PointEnv(goal=goal, max_episode_length=200) expert = OptimalPolicy(env.spec, goal=goal) policy = GaussianMLPPolicy(env.spec, [8, 8]) batch_size = 1000 sampler = RaySampler(agents=expert, envs=env, max_episode_length=env.spec.max_episode_length) algo = BC(env.spec, policy, batch_size=batch_size, source=expert, sampler=sampler, policy_lr=1e-2, loss=loss) trainer.setup(algo, env) trainer.train(100, batch_size=batch_size)
def test_bc_point_sample_batches(): deterministic.set_seed(100) runner = LocalRunner(snapshot_config) goal = np.array([1., 1.]) env = PointEnv(goal=goal) max_episode_length = 100 source = list(expert_source(env, goal, max_episode_length, 5)) policy = DeterministicMLPPolicy(env.spec, hidden_sizes=[8, 8]) batch_size = 600 algo = BC(env.spec, policy, batch_size=batch_size, source=source, policy_lr=1e-2, loss='mse') runner.setup(algo, env) run_bc(runner, algo, batch_size)
def test_bc_point_deterministic(ray_local_session_fixture): # NOQA del ray_local_session_fixture assert ray.is_initialized() deterministic.set_seed(100) runner = LocalRunner(snapshot_config) goal = np.array([1., 1.]) env = PointEnv(goal=goal, max_episode_length=200) expert = OptimalPolicy(env.spec, goal=goal) policy = DeterministicMLPPolicy(env.spec, hidden_sizes=[8, 8]) batch_size = 600 algo = BC(env.spec, policy, batch_size=batch_size, source=expert, policy_lr=1e-2, loss='mse') runner.setup(algo, env) run_bc(runner, algo, batch_size)
def test_bc_point(ray_local_session_fixture): # NOQA del ray_local_session_fixture assert ray.is_initialized() deterministic.set_seed(100) trainer = Trainer(snapshot_config) goal = np.array([1., 1.]) env = PointEnv(goal=goal, max_episode_length=200) expert = OptimalPolicy(env.spec, goal=goal) policy = GaussianMLPPolicy(env.spec, [4]) batch_size = 400 algo = BC(env.spec, policy, batch_size=batch_size, source=expert, policy_lr=1e-2, loss='log_prob') trainer.setup(algo, env) run_bc(trainer, algo, batch_size)
def bc_point(ctxt=None): """Run Behavioral Cloning on garage.envs.PointEnv. Args: ctxt (ExperimentContext): Provided by wrap_experiment. """ runner = LocalRunner(ctxt) goal = np.array([1., 1.]) env = PointEnv(goal=goal, max_episode_length=200) expert = OptimalPolicy(env.spec, goal=goal) policy = DeterministicMLPPolicy(env.spec, hidden_sizes=[8, 8]) batch_size = 1000 algo = BC(env.spec, policy, batch_size=batch_size, source=expert, policy_lr=1e-2, loss='mse') runner.setup(algo, env) runner.train(100, batch_size=batch_size)
def bc_point(ctxt=None, loss='log_prob'): """Run Behavioral Cloning on garage.envs.PointEnv. Args: ctxt (ExperimentContext): Provided by wrap_experiment. loss (str): Either 'log_prob' or 'mse' """ runner = LocalRunner(ctxt) goal = np.array([1., 1.]) env = GarageEnv(PointEnv(goal=goal)) expert = OptimalPolicy(env.spec, goal=goal) policy = GaussianMLPPolicy(env.spec, [8, 8]) batch_size = 1000 algo = BC(env.spec, policy, batch_size=batch_size, source=expert, max_path_length=200, policy_lr=1e-2, loss=loss) runner.setup(algo, env) runner.train(100, batch_size=batch_size)