Ejemplo n.º 1
0
def create_step_to_best():
    swarm = StepToBest(
        model=lambda x: DiscreteUniform(env=x),
        env=cartpole_env,
        reward_limit=51,
        n_walkers=100,
        max_epochs=160,
        step_epochs=3,
    )
    return swarm
Ejemplo n.º 2
0
def create_majority_step_swarm():
    swarm = StepSwarm(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnvironment(lambda: DiscreteEnv(ClassicControl())),
        reward_limit=10,
        n_walkers=100,
        max_epochs=20,
        step_epochs=25,
    )
    return swarm
Ejemplo n.º 3
0
def create_majority_step_swarm():
    swarm = StepSwarm(
        model=lambda x: DiscreteUniform(env=x),
        env=cartpole_env,
        reward_limit=10,
        n_walkers=100,
        max_epochs=20,
        step_epochs=25,
    )
    return swarm
Ejemplo n.º 4
0
def create_step_to_best():
    swarm = StepToBest(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnvironment(lambda: DiscreteEnv(ClassicControl())),
        reward_limit=16,
        n_walkers=100,
        max_epochs=5,
        step_epochs=25,
    )
    return swarm
Ejemplo n.º 5
0
def create_follow_best_step_swarm():
    swarm = StepSwarm(
        root_model=FollowBestModel,
        model=lambda x: DiscreteUniform(env=x),
        env=cartpole_env,
        reward_limit=101,
        n_walkers=100,
        max_epochs=200,
        step_epochs=10,
    )
    return swarm
Ejemplo n.º 6
0
def create_step_to_best():
    swarm = StepToBest(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnv(lambda: DiscreteEnv(
            ClassicControl("CartPole-v0"))),
        reward_limit=51,
        n_walkers=100,
        max_epochs=200,
        step_epochs=50,
    )
    return swarm
Ejemplo n.º 7
0
def create_follow_best_step_swarm():
    swarm = StepSwarm(
        root_model=FollowBestModel,
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnvironment(lambda: DiscreteEnv(ClassicControl())),
        reward_limit=15,
        n_walkers=100,
        max_epochs=15,
        step_epochs=25,
    )
    return swarm
Ejemplo n.º 8
0
def create_follow_best_step_swarm_after_impr():
    swarm = StepSwarm(
        root_model=FollowBestModel,
        model=lambda x: DiscreteUniform(env=x),
        env=cartpole_env,
        reward_limit=101,
        n_walkers=10,  # 0,
        max_epochs=2,  # 200,
        step_epochs=2,  # 5,
        step_after_improvement=True,
    )
    return swarm
Ejemplo n.º 9
0
def swarm_with_tree():
    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: DiscreteEnv(ClassicControl()),
        reward_limit=200,
        n_walkers=150,
        max_epochs=300,
        reward_scale=2,
        tree=HistoryTree,
        prune_tree=True,
    )
    return swarm
def create_cartpole_swarm():
    from fragile.core import DiscreteEnv, DiscreteUniform, Swarm
    from plangym import ClassicControl

    swarm = Swarm(
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: DiscreteEnv(ClassicControl("CartPole-v0")),
        reward_limit=51,
        n_walkers=50,
        max_epochs=100,
        reward_scale=2,
    )
    return swarm
Ejemplo n.º 11
0
def create_follow_best_step_swarm_after_impr():
    swarm = StepSwarm(
        root_model=FollowBestModel,
        model=lambda x: DiscreteUniform(env=x),
        env=lambda: ParallelEnv(lambda: DiscreteEnv(
            ClassicControl("CartPole-v0"))),
        reward_limit=101,
        n_walkers=100,
        max_epochs=200,
        step_epochs=25,
        step_after_improvement=True,
    )
    return swarm
Ejemplo n.º 12
0
def create_step_to_best_after_impr():
    from plangym import AtariEnvironment
    from fragile.core import GaussianDt

    env = AtariEnvironment(name="MsPacman-ram-v0", clone_seeds=True, autoreset=True)
    dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2)
    swarm = StepToBest(
        model=lambda x: DiscreteUniform(env=x, critic=dt),
        env=lambda: DiscreteEnv(env),
        reward_limit=-100,
        n_walkers=67,
        max_epochs=60,
        step_epochs=5,
        step_after_improvement=True,
    )
    return swarm