def create_cartpole_swarm(): swarm = Swarm( model=lambda x: DiscreteUniform(env=x), walkers=Walkers, env=lambda: DiscreteEnv(ClassicControl("CartPole-v0")), reward_limit=121, n_walkers=150, max_epochs=300, reward_scale=2, ) return swarm
def create_cartpole_swarm(): swarm = Swarm( model=lambda x: DiscreteUniform(env=x), walkers=Walkers, env=lambda: DiscreteEnv(ClassicControl()), n_walkers=20, max_iters=200, prune_tree=True, reward_scale=2, ) return swarm
def create_atari_swarm(): env = AtariEnvironment(name="MsPacman-ram-v0", ) dt = GaussianDt(min_dt=10, max_dt=100, loc_dt=5, scale_dt=2) swarm = Swarm( model=lambda x: DiscreteUniform(env=x, critic=dt), env=lambda: DiscreteEnv(env), n_walkers=6, max_epochs=10, reward_scale=2, reward_limit=1, ) return swarm
def create_atari_swarm(): env = AtariEnvironment(name="MsPacman-ram-v0", clone_seeds=True, autoreset=True) dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2) swarm = Swarm( model=lambda x: DiscreteUniform(env=x, critic=dt), walkers=Walkers, env=lambda: DiscreteEnv(env), n_walkers=67, max_epochs=500, reward_scale=2, reward_limit=751, ) return swarm
def create_atari_swarm(): env = ParallelEnvironment( env_class=AtariEnvironment, name="MsPacman-ram-v0", clone_seeds=True, autoreset=True, blocking=False, ) dt = GaussianDt(min_dt=3, max_dt=100, loc_dt=5, scale_dt=2) swarm = Swarm( model=lambda x: DiscreteUniform(env=x, critic=dt), walkers=Walkers, env=lambda: DiscreteEnv(env), n_walkers=67, max_iters=20, prune_tree=True, reward_scale=2, ) return swarm
def mathy_swarm(config: SwarmConfig, env_callable=None) -> Swarm: if env_callable is None: env_callable = lambda: FragileMathyEnv( name="mathy_v0", repeat_problem=config.single_problem) if config.use_mp: env_callable = ParallelEnv(env_callable=env_callable) tree_callable = None if config.history: tree_callable = lambda: HistoryTree(prune=True, names=config.history_names) swarm = Swarm( model=lambda env: DiscreteMasked(env=env), env=env_callable, tree=tree_callable, reward_limit=EnvRewards.WIN, n_walkers=config.n_walkers, max_epochs=config.max_iters, reward_scale=1, distance_scale=3, distance_function=mathy_dist, show_pbar=False, ) return swarm