def create_step_to_best(): swarm = StepToBest( model=lambda x: DiscreteUniform(env=x), env=cartpole_env, reward_limit=51, n_walkers=100, max_epochs=160, step_epochs=3, ) return swarm
def create_majority_step_swarm(): swarm = StepSwarm( model=lambda x: DiscreteUniform(env=x), env=lambda: ParallelEnvironment(lambda: DiscreteEnv(ClassicControl())), reward_limit=10, n_walkers=100, max_epochs=20, step_epochs=25, ) return swarm
def create_majority_step_swarm(): swarm = StepSwarm( model=lambda x: DiscreteUniform(env=x), env=cartpole_env, reward_limit=10, n_walkers=100, max_epochs=20, step_epochs=25, ) return swarm
def create_step_to_best(): swarm = StepToBest( model=lambda x: DiscreteUniform(env=x), env=lambda: ParallelEnvironment(lambda: DiscreteEnv(ClassicControl())), reward_limit=16, n_walkers=100, max_epochs=5, step_epochs=25, ) return swarm
def create_follow_best_step_swarm(): swarm = StepSwarm( root_model=FollowBestModel, model=lambda x: DiscreteUniform(env=x), env=cartpole_env, reward_limit=101, n_walkers=100, max_epochs=200, step_epochs=10, ) return swarm
def create_step_to_best(): swarm = StepToBest( model=lambda x: DiscreteUniform(env=x), env=lambda: ParallelEnv(lambda: DiscreteEnv( ClassicControl("CartPole-v0"))), reward_limit=51, n_walkers=100, max_epochs=200, step_epochs=50, ) return swarm
def create_follow_best_step_swarm(): swarm = StepSwarm( root_model=FollowBestModel, model=lambda x: DiscreteUniform(env=x), env=lambda: ParallelEnvironment(lambda: DiscreteEnv(ClassicControl())), reward_limit=15, n_walkers=100, max_epochs=15, step_epochs=25, ) return swarm
def create_follow_best_step_swarm_after_impr(): swarm = StepSwarm( root_model=FollowBestModel, model=lambda x: DiscreteUniform(env=x), env=cartpole_env, reward_limit=101, n_walkers=10, # 0, max_epochs=2, # 200, step_epochs=2, # 5, step_after_improvement=True, ) return swarm
def swarm_with_tree(): swarm = Swarm( model=lambda x: DiscreteUniform(env=x), env=lambda: DiscreteEnv(ClassicControl()), reward_limit=200, n_walkers=150, max_epochs=300, reward_scale=2, tree=HistoryTree, prune_tree=True, ) return swarm
def create_cartpole_swarm(): from fragile.core import DiscreteEnv, DiscreteUniform, Swarm from plangym import ClassicControl swarm = Swarm( model=lambda x: DiscreteUniform(env=x), env=lambda: DiscreteEnv(ClassicControl("CartPole-v0")), reward_limit=51, n_walkers=50, max_epochs=100, reward_scale=2, ) return swarm
def create_follow_best_step_swarm_after_impr(): swarm = StepSwarm( root_model=FollowBestModel, model=lambda x: DiscreteUniform(env=x), env=lambda: ParallelEnv(lambda: DiscreteEnv( ClassicControl("CartPole-v0"))), reward_limit=101, n_walkers=100, max_epochs=200, step_epochs=25, step_after_improvement=True, ) return swarm
def create_step_to_best_after_impr(): from plangym import AtariEnvironment from fragile.core import GaussianDt env = AtariEnvironment(name="MsPacman-ram-v0", clone_seeds=True, autoreset=True) dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2) swarm = StepToBest( model=lambda x: DiscreteUniform(env=x, critic=dt), env=lambda: DiscreteEnv(env), reward_limit=-100, n_walkers=67, max_epochs=60, step_epochs=5, step_after_improvement=True, ) return swarm