Ejemplo n.º 1
0
 def classic_control_env():
     env = ClassicControl()
     env.reset()
     env = DiscreteEnv(env)
     params = {"actions": {"dtype": np.int64}, "dt": {"dtype": np.float32}}
     states = States(state_dict=params, batch_size=N_WALKERS)
     states.update(actions=np.ones(N_WALKERS), dt=np.ones(N_WALKERS))
     return env, states
Ejemplo n.º 2
0
def create_majority_step_swarm():
    swarm = StepSwarm(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnv(lambda: DiscreteEnv(
            ClassicControl(name="CartPole-v0"))),
        reward_limit=10,
        n_walkers=100,
        max_epochs=20,
        step_epochs=25,
    )
    return swarm
Ejemplo n.º 3
0
def create_step_to_best():
    swarm = StepToBest(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnv(lambda: DiscreteEnv(
            ClassicControl("CartPole-v0"))),
        reward_limit=16,
        n_walkers=100,
        max_epochs=5,
        step_epochs=25,
    )
    return swarm
Ejemplo n.º 4
0
def create_cartpole_swarm():
    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x),
        walkers=Walkers,
        env=lambda: DiscreteEnv(ClassicControl("CartPole-v0")),
        reward_limit=121,
        n_walkers=150,
        max_epochs=300,
        reward_scale=2,
    )
    return swarm
Ejemplo n.º 5
0
def create_follow_best_step_swarm():
    swarm = StepSwarm(
        root_model=FollowBestModel,
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnv(lambda: DiscreteEnv(
            ClassicControl("CartPole-v0"))),
        reward_limit=15,
        n_walkers=100,
        max_epochs=15,
        step_epochs=25,
    )
    return swarm
def create_cartpole_swarm():
    from fragile.core import DiscreteEnv, DiscreteUniform, Swarm
    from plangym import ClassicControl

    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: DiscreteEnv(ClassicControl("CartPole-v0")),
        reward_limit=51,
        n_walkers=50,
        max_epochs=100,
        reward_scale=2,
    )
    return swarm
Ejemplo n.º 7
0
def cartpole_env():
    if judo.Backend.can_use_cuda():
        return RayEnv(lambda: DiscreteEnv(ClassicControl(name="CartPole-v0")), n_workers=2)
    else:
        return ParallelEnv(lambda: DiscreteEnv(ClassicControl(name="CartPole-v0")))
Ejemplo n.º 8
0
def classic_control_env():
    env = ClassicControl()
    env.reset()
    env = DiscreteEnv(env)
    return env