Exemple #1
0
def bc_point(ctxt=None, loss='log_prob'):
    """Run Behavioral Cloning on garage.envs.PointEnv.

    Args:
        ctxt (ExperimentContext): Provided by wrap_experiment.
        loss (str): Either 'log_prob' or 'mse'

    """
    trainer = Trainer(ctxt)
    goal = np.array([1., 1.])
    env = PointEnv(goal=goal, max_episode_length=200)
    expert = OptimalPolicy(env.spec, goal=goal)
    policy = GaussianMLPPolicy(env.spec, [8, 8])
    batch_size = 1000
    sampler = RaySampler(agents=expert,
                         envs=env,
                         max_episode_length=env.spec.max_episode_length)
    algo = BC(env.spec,
              policy,
              batch_size=batch_size,
              source=expert,
              sampler=sampler,
              policy_lr=1e-2,
              loss=loss)
    trainer.setup(algo, env)
    trainer.train(100, batch_size=batch_size)
Exemple #2
0
def test_bc_point_sample_batches():
    deterministic.set_seed(100)
    runner = LocalRunner(snapshot_config)
    goal = np.array([1., 1.])
    env = PointEnv(goal=goal)
    max_episode_length = 100
    source = list(expert_source(env, goal, max_episode_length, 5))
    policy = DeterministicMLPPolicy(env.spec, hidden_sizes=[8, 8])
    batch_size = 600
    algo = BC(env.spec,
              policy,
              batch_size=batch_size,
              source=source,
              policy_lr=1e-2,
              loss='mse')
    runner.setup(algo, env)
    run_bc(runner, algo, batch_size)
Exemple #3
0
def test_bc_point_deterministic(ray_local_session_fixture):  # NOQA
    del ray_local_session_fixture
    assert ray.is_initialized()
    deterministic.set_seed(100)
    runner = LocalRunner(snapshot_config)
    goal = np.array([1., 1.])
    env = PointEnv(goal=goal, max_episode_length=200)
    expert = OptimalPolicy(env.spec, goal=goal)
    policy = DeterministicMLPPolicy(env.spec, hidden_sizes=[8, 8])
    batch_size = 600
    algo = BC(env.spec,
              policy,
              batch_size=batch_size,
              source=expert,
              policy_lr=1e-2,
              loss='mse')
    runner.setup(algo, env)
    run_bc(runner, algo, batch_size)
Exemple #4
0
def test_bc_point(ray_local_session_fixture):  # NOQA
    del ray_local_session_fixture
    assert ray.is_initialized()
    deterministic.set_seed(100)
    trainer = Trainer(snapshot_config)
    goal = np.array([1., 1.])
    env = PointEnv(goal=goal, max_episode_length=200)
    expert = OptimalPolicy(env.spec, goal=goal)
    policy = GaussianMLPPolicy(env.spec, [4])
    batch_size = 400
    algo = BC(env.spec,
              policy,
              batch_size=batch_size,
              source=expert,
              policy_lr=1e-2,
              loss='log_prob')
    trainer.setup(algo, env)
    run_bc(trainer, algo, batch_size)
Exemple #5
0
def bc_point(ctxt=None):
    """Run Behavioral Cloning on garage.envs.PointEnv.

    Args:
        ctxt (ExperimentContext): Provided by wrap_experiment.

    """
    runner = LocalRunner(ctxt)
    goal = np.array([1., 1.])
    env = PointEnv(goal=goal, max_episode_length=200)
    expert = OptimalPolicy(env.spec, goal=goal)
    policy = DeterministicMLPPolicy(env.spec, hidden_sizes=[8, 8])
    batch_size = 1000
    algo = BC(env.spec,
              policy,
              batch_size=batch_size,
              source=expert,
              policy_lr=1e-2,
              loss='mse')
    runner.setup(algo, env)
    runner.train(100, batch_size=batch_size)
Exemple #6
0
def bc_point(ctxt=None, loss='log_prob'):
    """Run Behavioral Cloning on garage.envs.PointEnv.

    Args:
        ctxt (ExperimentContext): Provided by wrap_experiment.
        loss (str): Either 'log_prob' or 'mse'

    """
    runner = LocalRunner(ctxt)
    goal = np.array([1., 1.])
    env = GarageEnv(PointEnv(goal=goal))
    expert = OptimalPolicy(env.spec, goal=goal)
    policy = GaussianMLPPolicy(env.spec, [8, 8])
    batch_size = 1000
    algo = BC(env.spec,
              policy,
              batch_size=batch_size,
              source=expert,
              max_path_length=200,
              policy_lr=1e-2,
              loss=loss)
    runner.setup(algo, env)
    runner.train(100, batch_size=batch_size)