def __init__( self, model: Callable = lambda x: NormalContinuous(bounds=x.bounds), accumulate_rewards: bool = False, minimize: bool = True, start_same_pos: bool = False, *args, **kwargs, ): """ Initialize a :class:`FunctionMapper`. Args: model: A function that returns an instance of a :class:`Model`. accumulate_rewards: If ``True`` the rewards obtained after transitioning \ to a new state will accumulate. If ``False`` only the last \ reward will be taken into account. minimize: If ``True`` the algorithm will perform a minimization \ process. If ``False`` it will be a maximization process. start_same_pos: If ``True`` all the walkers will have the same \ starting position. *args: Passed :class:`Swarm` __init__. **kwargs: Passed :class:`Swarm` __init__. """ super(FunctionMapper, self).__init__(model=model, accumulate_rewards=accumulate_rewards, minimize=minimize, *args, **kwargs) self.start_same_pos = start_same_pos
def create_model(name="discrete"): if name == "discrete": return lambda: DiscreteUniform(n_actions=10) elif name == "continuous": bs = Bounds(low=-1, high=1, shape=(3, )) return lambda: ContinuousUniform(bounds=bs) elif name == "random_normal": bs = Bounds(low=-1, high=1, shape=(3, )) return lambda: NormalContinuous(loc=0, scale=1, bounds=bs) raise ValueError("Invalid param `name`.")
def create_function_swarm(): env = Rastrigin(dims=2) swarm = FunctionMapper( model=lambda x: NormalContinuous(bounds=env.bounds), env=lambda: env, n_walkers=5, max_epochs=5, prune_tree=True, reward_scale=2, minimize=False, ) return swarm
def test_sample(self): bounds = Bounds(low=-5, high=5, shape=(3, )) model = NormalContinuous(bounds=bounds) actions = model.predict(batch_size=10000).actions assert actions.min() >= -5 assert actions.max() <= 5 assert numpy.allclose(actions.mean(), 0, atol=0.05) assert numpy.allclose(actions.std(), 1, atol=0.05) bounds = Bounds(low=-10, high=30, shape=(3, 10)) model = NormalContinuous(bounds=bounds, loc=5, scale=2) actions = model.predict(batch_size=10000).actions assert actions.min() >= -10 assert actions.max() <= 30 assert numpy.allclose(actions.mean(), 5, atol=0.05), actions.mean() assert numpy.allclose(actions.std(), 2, atol=0.05), actions.std()
def gaussian_model(env): # Gaussian of mean 0 and std of 10, adapted to the environment bounds return NormalContinuous(scale=10, loc=0.0, bounds=env.bounds)