class RateMap(Input): """ Uniform spike train generator with rates based on environment state. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python processing_time = 10 config = { "n_inputs": 10, "magnitude": 2, "input_firing_steps": -1, "input_pct_inhibitory": 0.2, "state_rate_map": [.0, .8], } input = RateMap(**config) input.reset() env = Logic(preset='XOR') state = env.reset() for step in range(10): input.update(state) for _ in range(processing_time) in_fires = input.__call__() state, _, done, __ = env.update(0) if done: break .. code-block:: python class network_template(Network): keys = { "n_inputs": 10, "magnitude": 2, "input_firing_steps": -1, "input_pct_inhibitory": 0.2, "state_rate_map": [.0, .8], } parts = { "inputs": RateMap } """ NECESSARY_KEYS = Input.extend_keys( [ Key( "state_rate_map", "dict[float or list[floats] if groups>1] Elementwise State->Rate map.", type=(dict, np.ndarray), ), ] ) def __init__(self, **kwargs): super().__init__(**kwargs) def __call__(self) -> np.bool: """ Spikes output from each input neuron, called once per network step. Returns ------- ndarray[n_inputs, dtype=bool] Spike output for each neuron. """ if not self.values.size: return [] if ( self._input_firing_steps != -1 and self.network_time > self._input_firing_steps ): return np.zeros(self.values.shape) spikes = np.where( np.random.uniform(0, 1, size=self.values.size) <= self.values, self._magnitude, 0.0, ) self.network_time += 1 return spikes * self.polarities def update(self, state: object): """ Update input generator, called once per game step. Parameters ---------- state: object Enviornment state in format generator can understand. """ self.network_time = 0 if not isinstance(self._state_rate_map, dict): if isinstance(state, (int, float)): state = np.int_([state]) elif isinstance(state, (np.ndarray, list, tuple)): try: state = np.int_(state) except TypeError: pass rate = self._state_rate_map[state] if not rate.size or self._n_inputs % rate.size: raise ValueError( f"N_INPUTS must divide evenly by number of value in rate, {self._n_inputs} / {rate.size}" ) self.values = np.ravel( np.array([rate for _ in range(self._n_inputs // rate.size)]) )
class Neuron(Module): """ A group of spiking neurons. Each spiking neuron has an internal membrane potential that increases with each incoming spike. The potential persists but slowly decreases over time. Each neuron fires when its potential surpasses some firing threshold and does not fire again for the duration of its refractory period. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "magnitude": 2, "n_neurons": 100, "firing_threshold": 16, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, } neurons = Neuron(**config) neurons.reset() weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons])) for i in range(100): spikes = self.neurons() neurons += np.sum( weights * spikes.reshape((-1, 1)), axis=0 ) .. code-block:: python class network_template(Network): keys = { "magnitude": 2, "n_neurons": 100, "firing_threshold": 16, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, } parts = { "neurons": Neuron } """ NECESSARY_KEYS = [ Key("magnitude", "Magnitude of spike.", float), Key("n_neurons", "Number of neurons in the network.", int), Key("firing_threshold", "Neuron voltage threshold to fire.", float), Key( "neuron_pct_inhibitory", "[0, 1] Percentage of inhibitory neurons.", float, default=0, ), Key("potential_decay", "[0, 1] Percentage voltage loss on each tick.", float), Key( "prob_rand_fire", " [0, 1] Probability each neuron will randomly fire", float, default=0, ), Key("refractory_period", "Amount of time after spike neuron cannot fire.", int), Key("resting_mv", "Neuron resting voltage.", float, default=0.0), Key( "spike_delay", "[0, 10] Units of time after hitting threshold to fire.", int, default=0, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) if "polarities" in kwargs: self.polarities = np.array(kwargs["polarities"]) else: polarities = np.random.uniform(size=self._n_neurons) self.polarities = np.where( polarities < self._neuron_pct_inhibitory, -1.0, 1.0) ## Initialized in self.reset() self.potentials = self.refractory_timers = None self.spike_shape = self.schedule = None def reset(self): """ Reset all neuron members. Called at the start of each episode. """ self.potentials = self._resting_mv * np.ones(self._n_neurons, dtype="float16") self.refractory_timers = np.zeros(self._n_neurons) self.spike_shape = self._generate_spike_shape() self.schedule = np.zeros(shape=(self.spike_shape.size, self._n_neurons)) def _generate_spike_shape(self) -> np.bool: """ Generate neuron output schedule for time after it's potential passes the firing threshold. Returns ------- ndarray[SCHEDULE_LENGTH, dtype=bool] Neuron output schedule. """ SCHEDULE_LENGTH = max(10, self._spike_delay) spike_shape = np.zeros(shape=(SCHEDULE_LENGTH, 1)) spike_shape[self._spike_delay] = 1 return spike_shape def __call__(self) -> np.bool: """ Determine whether each neuron will fire or not according to threshold. Called once per network step. Parameters ---------- threshold: float Spiking threshold, neurons schedule spikes if potentials >= threshold. Returns ------- ndarray[n_neurons, dtype=bool] Spike output from each neuron at the current timestep. Examples -------- .. code-block:: python config = { "magnitude": 2, "n_neurons": 100, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, } neurons = Neuron(**config) neurons.reset() weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons])) for i in range(100): spikes = self.neurons() neurons += np.sum( weights * spikes.reshape((-1, 1)), axis=0 ) """ spike_occurences = self.potentials >= self._firing_threshold spike_occurences += (np.random.uniform(0, 1, size=self._n_neurons) < self._prob_rand_fire) spike_occurences &= self.refractory_timers <= 0 self.refractory_timers[spike_occurences] = self._refractory_period + 1 self.schedule += self.spike_shape * np.int_(spike_occurences) output = self.schedule[0] * self.polarities * self._magnitude return output def __iadd__(self, incoming_v: np.float): """ Cool alias for neuron.update. Simulate the neurons for one time step and add incoming voltage to the neurons membrane potentials. Called once per network step. Parameters ---------- incoming_v: np.ndarray[neurons, dtype=float] Amount to increase each neuron's potential by. Examples -------- .. code-block:: python config = { "magnitude": 2, "n_neurons": 100, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, } neurons = Neuron(**config) neurons.reset() weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons])) for i in range(100): spikes = self.neurons() neurons += np.sum( weights * spikes.reshape((-1, 1)), axis=0 ) """ self.update(incoming_v) return self def update(self, incoming_v: np.float): """ Simulate the neurons for one time step and add incoming voltage to the neurons membrane potentials. Called once per network step. Parameters ---------- incoming_v: np.ndarray[neurons, dtype=float] Amount to increase each neuron's potential by. Examples -------- .. code-block:: python config = { "magnitude": 2, "n_neurons": 100, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, } neurons = Neuron(**config) neurons.reset() weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons])) for i in range(100): spikes = self.neurons() neurons.update(np.sum( weights * spikes.reshape((-1, 1)), axis=0 )) """ self.refractory_timers -= 1 self.schedule = np.vstack( (self.schedule[1:], np.zeros(shape=self._n_neurons))) self.potentials[np.where( self.refractory_timers > 0)] = -65499.0 # finfo('float16').min self.potentials[np.where( self.refractory_timers == 0)] = self._resting_mv decay = 1 - self._potential_decay self.potentials = (self.potentials - self._resting_mv) * decay + self._resting_mv self.potentials += incoming_v
class CartPole(RL): """ Inverted pendulum / pole-cart / cart-pole reinforcement learning :: g=9.8 / | / pole: Length = 1 m | / V / / θ (angle), theta_dot is angular velocity ______/_____ | | Cart: M = 1 kg |____________| ----> x_dot is velocity O O L1--------x-------------------L2 x is poxition, with x limits of L1, L2) Actions: jerk left, jerk right (AKA bang-bang control) Goal: control x position of cart to keep pole close to upright, which is when θ = pi/2 (vertical). Florian. "Correct equations for the dynamics of the cart-pole system." Center for Cognitive and Neural Studies(Coneural), 10 Feb 2007, https://coneural.org/florian/papers/05_cart_pole.pdf Parameters ---------- preset: str=PRESETS.keys(), default=DEFAULT Configuration preset key, default values for game parameters. callback: ExperimentCallback, default=None Callback to send relevant function call information to. kwargs: dict, default=None Game parameters for NECESSARY_KEYS. Overrides preset settings. Examples -------- .. code-block:: python game = CartPole(preset="DEFAULT") game.seed(0) state = game.reset() for _ in range(100): action = model.get_action(state) state, reward, done, info = game.step(action) if done: break game.close() .. code-block:: python class game_template(CartPole): config = CartPole.PRESETS["DEFAULT"] config.update({ # Overrides preset values "param1": 1 "param2": 2, }) kwargs = { "param1": 0, # Overrides game_template.config["param1"] } game = game_template(**kwargs) game.seed(0) state = game.reset() for _ in range(100): action = model.get_action(state) state, reward, done, info = game.step(action) if done: break game.close() """ action_space = np.arange(-1, 1, 0.1) observation_space = None # Defined in init metadata = {"render.modes": ["human"]} NECESSARY_KEYS = [ Key("x_max", "If abs(x) > x_max: game over", float), Key("theta_max", "if abs(theta) > theta_max: game over", float), Key("x_init_range", "list[float] Range of initial x values.", list), Key("theta_init_range", "list[float] Range of initial theta values.", list), Key("xdot_init_range", "list[float] Range of initial x_dot values.", list), Key( "thetadot_init_range", "list[float] Range of initial theta_dot values.", list, ), Key("g", "Force of gravity", float, default=9.8), Key("Mass_Cart", "Mass of cart", float, default=1.0), Key("Mass_Pole", "Mass of the pole", float, default=0.1), Key("pole_half_length", "Half of the length of the pole", float, default=0.5), Key("Force_Mag", "Force of push", float, default=10.0), Key("Tau", "Time interval for updating the values", float, default=0.02), ] PRESETS = { "DEFAULT": { "xdot_init_range": [-0.1, 0.1], "thetadot_init_range": [-0.1, 0.1], "x_init_range": [0.0, 0.0], "theta_init_range": [0.0, 0.0], "Tau": 0.02, "x_max": 4.5, "theta_max": 0.5 * np.pi, }, "FREMAUX": { "xdot_init_range": [-0.1, 0.1], "thetadot_init_range": [-0.1, 0.1], "x_init_range": [0.0, 0.0], "theta_init_range": [0.0, 0.0], "Tau": 0.02, "x_max": 2.5, "theta_max": 0.5 * np.pi, }, } def __init__(self, preset: str = "DEFAULT", callback: object = None, **kwargs): super().__init__(preset=preset, callback=callback, **kwargs) high = np.array( [ self.params["x_max"], np.finfo(np.float32).max, self.params["theta_max"], np.finfo(np.float32).max, ], dtype=np.float32, ) self.observation_space = NotImplemented def step(self, action: np.ndarray) -> (np.ndarray, 0, bool, {}): """ Act within the environment. Parameters ---------- action: np.ndarray Force pushing in each direction, eg [.5, .5] = 0N of force, [1., 0.] = 1N of force directed left, [0., 1.] = 1N of force directed right. Returns ------- state: ndarray[4, float]=(x, x', theta, theta') State updated according to action taken. reward: float, = 0 Reward given by environment. done: bool Whether the game is done or not. info: dict, = {} Information of environment. Examples -------- .. code-block:: python game = Cartpole(preset="DEFAULT") game.seed(0) state = game.reset() for _ in range(100): action = model.get_action(state) state, reward, done, info = game.step(action) if done: break game.close() """ PoleMass_Length = self.params["Mass_Pole"] * self.params["pole_half_length"] Total_Mass = self.params["Mass_Cart"] + self.params["Mass_Pole"] Fourthirds = 4.0 / 3.0 # if hasattr(action, "__len__") and len(action) > 1: force = np.dot(action, [-1, 1]) * self.params["Force_Mag"] else: force = action # force = [-1, 1][np.argmax(action)] * self.params['Force_Mag'] assert force < self.params["Force_Mag"] * 1.2, "Action force too high." x, x_dot, theta, theta_dot = self._state temp = ( force + PoleMass_Length * theta_dot * theta_dot * np.sin(theta) ) / Total_Mass thetaacc = (self.params["g"] * np.sin(theta) - np.cos(theta) * temp) / ( self.params["pole_half_length"] * ( Fourthirds - self.params["Mass_Pole"] * np.cos(theta) * np.cos(theta) / Total_Mass ) ) xacc = temp - PoleMass_Length * thetaacc * np.cos(theta) / Total_Mass # Update the four state variables, using Euler's method: # https://en.wikipedia.org/wiki/Euler_method x = x + self.params["Tau"] * x_dot x_dot = x_dot + self.params["Tau"] * xacc theta = theta + self.params["Tau"] * theta_dot theta_dot = theta_dot + self.params["Tau"] * thetaacc state_new = np.array([x, x_dot, theta, theta_dot]) ## x, x_dot, theta, theta_dot = state_new f = abs(x) > self.params["x_max"] or abs(theta) > self.params["theta_max"] rwd = 0 info = {} self.callback.game_step(action, self._state, state_new, rwd, f, info) self._state = state_new return state_new, rwd, f, info def reset(self) -> np.ndarray: """ Reset environment. Returns ------- ndarray[4, float]=(x, x', theta, theta') Initial game state randomly generated in bounds, (*x_init_range * [-1 or 1], *x_dot_init_range * [-1 or 1], *theta_init_range * [-1 or 1], *thetadot_init_range * [-1 or 1]). Examples -------- .. code-block:: python game = Cartpole(preset="DEFAULT") game.seed(0) state = game.reset() """ x = np.random.uniform(*self.params["x_init_range"]) * np.random.choice([-1, 1]) x_dot = np.random.uniform(*self.params["xdot_init_range"]) * np.random.choice( [-1, 1] ) theta = np.random.uniform(*self.params["theta_init_range"]) * np.random.choice( [-1, 1] ) theta_dot = np.random.uniform( *self.params["thetadot_init_range"] ) * np.random.choice([-1, 1]) s = np.array([x, x_dot, theta, theta_dot]) self.callback.game_reset(s) self._state = s return s def render(self, states: np.ndarray, mode: str = "human"): """Renders the environment. The set of supported modes varies per environment. (And some environments do not support rendering at all.) By convention, .. note:: Make sure that your class's metadata 'render.modes' key includes the list of supported modes. It's recommended to call super() in implementations to use the functionality of this method. .. code-block:: python class MyEnv(Env): metadata = {'render.modes': ['human', 'rgb_array']} def render(self, mode='human'): if mode == 'rgb_array': return np.array(...) # return RGB frame suitable for video elif mode == 'human': ... # pop up a window and render else: super(MyEnv, self).render(mode=mode) # just raise an exception Parameters ---------- mode (str, in ['human']): the mode to render with Examples -------- .. code-block:: python game = Cartpole(preset="DEFAULT") game.seed(0) state = game.reset() for _ in range(100): action = model.get_action(state) state, reward, done, info = game.step(action) if done: break game.render() game.close() """ def initGraph(): """ Init for animated graph below """ line.set_data([], []) return (line,) def animate(i): """ Each step/refresh of the animatd graph. This sort of gets "looped". """ thisx = [x1[i], x2[i]] thisy = [y1, y2[i]] line.set_data(thisx, thisy) return (line,) import matplotlib.pyplot as plt import matplotlib.animation as animation toPlot = states xList = [state[0] for state in toPlot] thetaList = [state[2] for state in toPlot] x1 = xList y1 = 0 x2 = 1 * np.sin(thetaList) + x1 y2 = 1 * np.cos(thetaList) + y1 fig = plt.figure() ax = plt.axes(xlim=(-4, 4), ylim=(-0.25, 1.25)) ax.grid() (line,) = ax.plot([], [], "o-", lw=2) animation.FuncAnimation( fig, animate, np.arange(1, len(xList)), interval=30, blit=True, init_func=initGraph, ) plt.show()
class EvolveNetwork(MetaRL): """ An environment to tune spiking neural network parameters on a RL game. GENOTYPE_CONSTRAINTS are parameterized by the user with the genotype_constraints init parameter. Networks are parameterized with a combination of their genotype and original config with the genotype taking priority. See constraint docs in spikey/meta/series. Parameters ---------- kwargs: dict, default=None Game parameters for NECESSARY_KEYS. Overrides preset settings. Examples -------- .. code-block:: python metagame = EvolveNetwork() game.seed(0) for _ in range(100): genotype = [{}, ...] fitness, done = metagame.get_fitness(genotype) if done: break game.close() .. code-block:: python metagame = EvolveNetwork(**metagame_config) game.seed(0) population = Population(... metagame, ...) # population main loop """ NECESSARY_KEYS = MetaRL.extend_keys([ Key( "training_loop", "Pre-configured trainingloop to run and gauge fitness of.", ), Key( "genotype_constraints", "A constraint for every trainingloop parameter that should be trained. " + "See constraint docs in spikey/meta/series.", dict, ), Key( "static_updates", "Updates to a specific network or game parameter. " + "Used in meta.Series, see series configuration for details.", default=None, ), Key("n_reruns", "Number of times to rerun each experiment.", int, default=2), Key("win_fitness", "Fitness threshold necessary to terminate MetaRL.", float), Key( "fitness_getter", "f(net, game, results, info)->float Function to determine experiment fitness.", ), Key( "fitness_aggregator", "f([fitness, ..])->float Aggregate fitnesses of each experiment rerun.", default=np.mean, ), ]) GENOTYPE_CONSTRAINTS = {} def __init__(self, **kwargs): super().__init__(**kwargs) self.GENOTYPE_CONSTRAINTS = self._genotype_constraints def get_fitness(self, genotype: dict) -> (float, bool): """ Train a neural network on an RL environment to gauge its fitness. Parameters ---------- genotype: dict Dictionary with values for each key in GENOTYPE_CONSTRAINTS. Returns ------- fitness: float Fitness of genotype given. done: bool Whether termination condition has been reached or not. Examples -------- .. code-block:: python metagame = EvolveNetwork() game.seed(0) for _ in range(100): genotype = [{}, ...] fitness, done = metagame.get_fitness(genotype) if done: break game.close() """ training_loop = self._training_loop.copy() training_loop.reset(**genotype, **self.params) series = Series( training_loop, self._static_updates, backend=SingleProcessBackend(), ) tracking = [] for experiment in series: for _ in range(self._n_reruns): network, game, results, info = experiment() tracking.append( self._fitness_getter(network, game, results, info)) fitness = self._fitness_aggregator(tracking) terminate = fitness >= self._win_fitness return fitness, terminate
class Input(Module): """ Spike based stimulus encoding. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python processing_time = 10 config = { "n_inputs": 10, "magnitude": 2, "input_firing_steps": -1, "input_pct_inhibitory": 0.2, } input = Input(**config) input.reset() env = Logic(preset='XOR') state = env.reset() for step in range(10): input.update(state) for _ in range(processing_time) in_fires = input() state, _, done, __ = env.update(0) if done: break .. code-block:: python class network_template(Network): keys = { "n_inputs": 10, "magnitude": 2, "input_firing_steps": -1, "input_pct_inhibitory": 0.2, } parts = { "inputs": Input } """ NECESSARY_KEYS = [ Key("n_inputs", "Number input neurons, separate from body.", int), Key("magnitude", "Multiplier to each 0, 1 spike value.", float), Key( "input_firing_steps", "Number of network steps to fire for, -1 if all.", int, default=-1, ), Key( "input_pct_inhibitory", "Pct of inputs that are inhibitory", float, default=0, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) self.polarities = np.where( np.random.uniform(0, 1, self._n_inputs) > self._input_pct_inhibitory, 1, -1 ) self.values = self.network_time = None def __len__(self) -> int: """ Size of input generator == number inputs. """ return self._n_inputs def __call__(self) -> np.bool: """ Spikes output from each input neuron, called once per network step. Returns ------- ndarray[n_inputs, bool] Spike output for each neuron. """ self.network_time += 1 raise NotImplementedError("Input gen __call__ function not implemented!") def reset(self): """ Reset Input. Called at the start of each episode. """ def update(self, state: object): """ Update input generator, called once per game step. Parameters ---------- state: object Enviornment state in format generator can understand. """ self.network_time = 0 try: self.values = tuple(state) except TypeError: self.values = state
class ActiveRLNetwork(RLNetwork): """ The foundation for building and handling spiking neural networks. Network serves as the container and manager of all SNN parts like the neurons, synapses, reward function, ... It is designed to interact with an RL environment. .. note:: There are a few types of Networks for different uses, this one is the base for reinforcement learning with SNNs giving reward at every network step(see RLNetwork for reward per game step). Parameters ---------- callback: ExperimentCallback, default=None Callback to send relevant function call information to for logging. game: RL, default=None The environment the network will be interacting with, parameter is to allow network to pull relevant parameters in init. kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "rewarder": snn.reward.Reward, "modifiers": None, # [snn.modifier.Modifier,] } params = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } config = {**parts, **params} game = Logic(preset="XOR", **config) network = RLNetwork(game=game, **config) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) # Calculated reward per env step, does not affect network # Actual rewarding handled in ActiveRLNetwork.tick(). reward = network.reward(state, action, state_next) state = state_next if done: break .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(ActiveRLNetwork): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "rewarder": snn.reward.Reward, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) # Calculated reward per env step, does not affect network # Actual rewarding handled in ActiveRLNetwork.tick(). reward = network.reward(state, action, state_next) state = state_next if done: break """ NECESSARY_KEYS = RLNetwork.extend_keys([ Key( "continuous_rwd_action", "f(network, state)->any Function to get action parameter for rewarder when using continuous_reward.", ) ]) def reward(self, state: object, action: object, state_next: object, reward: float = None) -> float: """ If reward given as parameter and DON'T apply reward to synapses. Otherwise rewarder calculates based on state and action. Called once per game step. Parameters ---------- state: any State of environment where action was taken. action: any Action taken by network in response to state. state_next: any State of environment after action was taken. reward: float, default=None Reward already calculated, if None it will be determined by the rewarder. Returns ------- float Reward calculated for taking action in state. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(ActiveRLNetwork): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) # Calculated reward per env step, does not affect network # Actual rewarding handled in ActiveRLNetwork.tick(). reward = network.reward(state, action, state_next) state = state_next if done: break """ self.callback.network_reward(state, action, state_next, reward) return reward def continuous_reward(self, state: object, reward: float = None) -> float: """ If reward given as parameter, apply reward to synapses. Otherwise rewarder calculates based on state and action, then applies to synapses. Continuous reward meant to be applied per network step. Parameters ---------- state: any State of environment where action was taken. reward: float, default=None Reward to give network, if None it will be determined by the rewarder. Returns ------- float Reward given to network. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(ActiveRLNetwork): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) # Calculated reward per env step, does not affect network # Actual rewarding handled in ActiveRLNetwork.tick(). reward = network.reward(state, action, state_next) state = state_next if done: break """ action = self._continuous_rwd_action(self, state) reward = reward if reward is not None else self.rewarder( state, action, None) self.synapses.reward(reward) self.callback.network_continuous_reward(state, action, reward) return reward def tick(self, state: object) -> object: """ Determine network response to given stimulus. Parameters ---------- state: any Current environment state. Returns ------- any Network response to stimulus. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(ActiveRLNetwork): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) # Calculated reward per env step, does not affect network # Actual rewarding handled in ActiveRLNetwork.tick(). reward = network.reward(state, action, state_next) state = state_next if done: break """ self._polarities = np.append(self.inputs.polarities, self.neurons.polarities) self._spike_log[:self.synapses._stdp_window] = self._spike_log[ -self.synapses._stdp_window:] self._normalized_spike_log = self._spike_log.astype(bool) self.inputs.update(state) if self.modifiers is not None: for modifier in self.modifiers: modifier.update(self) for i in range(self._processing_time): self._process_step(i, state) self.continuous_reward(state, None) outputs = self._spike_log[-self._processing_time:, -self._n_outputs:] output = self.readout(outputs) self.callback.network_tick(state, output) return output
class Synapse(Module): """ Hedonistic synapses updating weights based on stdp suggestions. The weight matrix defines how much charge from pre-synaptic neurons goes to which post-synaptic neurons. The weight matrix is stored in and managed by the Weight class, stored in Synapse as self.weight. Synapse defines the learning behavior of the synapses(weights) of the network based on neuron spike times. The Spike-timing-dependent synaptic plasticity(STDP) learning algorithm is a variant of the fire together wire together rule. Similar to hebbian learning, for any synapse, if the pre-synaptic neuron tends to fire soon before the post-synaptic neuron, the synapses weight will increase. If the opposite tends to happen, post before pre firings, the weight will decrease. Often times the eligability trace of some sparse variable(eg dopaime reward) is tracked and is used as a factor of the update rule along with learning rate. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python w_config = { "n_neurons": 50, "n_inputs": 0, "matrix": np.random.uniform(size=(10, 10)), } w = Manual(**config) config = { "n_neurons": 50, "n_inputs": 0, "stdp_window": 200, "learning_rate": .05, "trace_decay": .1, } synapse = Synapse(w, **config) synapse.reset() pre_fires = np.random.uniform(size=config['n_neurons']) <= .08 post_fires = np.matmul(w.matrix, pre_fires) >= 2 spike_log = np.vstack((post_fires, pre_fires)) synapse.update(spike_log, np.zeros(config['n_neurons])) .. code-block:: python class network_template(Network): keys = { "n_neurons": 50, "n_inputs": 10, "stdp_window": 200, "learning_rate": .05, "trace_decay": .1, } parts = { "synapses": Synapse } """ NECESSARY_KEYS = [ Key("n_neurons", "Number of neurons in network.", int), Key("n_inputs", "Number input neurons, separate from body.", int), Key("stdp_window", "Time period that stdp will take effect.", int), Key("learning_rate", "Scalar to trace updates.", float), Key("trace_decay", "Percent to decay trace by per timestep.", float, default=1), ] def __init__(self, w: object, **kwargs): super().__init__(**kwargs) self.weights = w self.trace = None def reset(self): """ Reset Synapse member variables. Called at the start of each episode. """ self.trace = np.zeros( shape=(self._n_inputs + self._n_neurons, self._n_inputs + self._n_neurons), dtype=np.float32, ) def _hebbian(self, pre_locs, post_locs, dts, inverse=False): """ Consise implementation of the core hebbian ltp/ltd rule. Parameters ---------- pre_locs: np.int Locations of pre-synaptic fires. post_locs: np.int Locations of post-synaptic fires. dts: np.float[n_neurons] Per neuron totals of the per-fire STDP credit to give. inverse: bool, default=False To apply LTD(anti-hebbian) instead of LTP. """ if not inverse: pre_locs = pre_locs.reshape((-1, 1)) body_post_locs = post_locs[post_locs >= self._n_inputs] - self._n_inputs self.weights._matrix[pre_locs, body_post_locs] += dts[pre_locs] if inverse: post_locs = post_locs.reshape((-1, 1)) body_pre_locs = pre_locs[pre_locs >= self._n_inputs] - self._n_inputs self.weights._matrix[post_locs, body_pre_locs] -= dts[self._n_inputs :][ body_pre_locs ].reshape((1, -1)) def _decay_trace(self): """ Decay eligability trace. """ ## Pre-computing ssaves a considerable amount of time! mul = 1 - self._trace_decay self.trace *= mul def _apply_stdp(self, spike_log: np.bool, inhibitories: np.bool): """ Update synaptic weights via STDP rule. Parameters ---------- spike_log: np.array(time, neurons), 0 or 1 A history of neuron firings with spike_log[-1] is most recent. inhibitories: list[int], -1 or 1 Neuron polarities. """ raise NotImplementedError("Update trace function needs to be implemented!") def update(self, spike_log: np.bool, inhibitories: np.int) -> None: """ Update trace for one time step based on decay rule and STDP suggestions. Called once per network step. Parameters ---------- spike_log: np.array(time, neurons) A history of when neurons have spiked, 1 at spike, 0 quiescent with spike_log[-1] is most recent. inhibitories: np.array(neurons) The polarity, 1 or -1, of each nueron """ if self.training: self._apply_stdp(spike_log, inhibitories) self._decay_trace()
class Random(Weight): """ Randomly generated network. The data structure to generate and manage connections between neurons. Contains generation, arithmetic and get operations. Updates are handled in spikey.snn.Synapse objects. .. note:: Weight._matrix must be a masked ndarray with fill_value=0 while Weight.matrix is a simple ndarray. Arithmetic operations(a * b) use unmasked matrix for speed while inplace(a += b) arithmetic uses masked values. Get operations(Weight[[1, 2, 3]]) apply to masked ndarray. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "n_inputs": 1, "n_neurons": 10, "max_weight": 3, "force_unidirectional": True, "weight_generator": lambda *a, **kw: np.random.uniform(0, 3, *a, **kw), "matrix_mask": np.random.uniform(size=(1+10, 10)) <= .2, } w = Random(**config) in_volts = w * np.ones(config['n_neurons']) .. code-block:: python class network_template(Network): keys = { "n_inputs": 1, "n_neurons": 10, "max_weight": 3, "force_unidirectional": True, "weight_generator": lambda *a, **kw: np.random.uniform(0, 3, *a, **kw), "matrix_mask": np.random.uniform(size=(1+10, 10)) <= .2, } parts = { "weights": Random } """ NECESSARY_KEYS = Weight.extend_keys([ Key( "force_unidirectional", "bool Whether or not to force matrix unidirectional.", bool, default=False, ), Key( "weight_generator", "f(size: int, shape: 2 tuple)->ndarray Function to generate weights.", ), Key( "matrix_mask", "np.bool[inputs+neurons, neurons OR neurons, neurons] or None. True=generate weights, False=empty.", (np.ndarray, list, type(None)), ), ]) def __init__(self, **kwargs): super().__init__(**kwargs) if self._matrix_mask is None: input_weights = self._weight_generator( (self._n_inputs, self._n_neurons)) body_weights = self._weight_generator( (self._n_neurons, self._n_neurons)) else: if isinstance(self._matrix_mask, list) and isinstance( self._matrix_mask[0], np.ndarray): self._matrix_mask = self._convert_feedforward( self._matrix_mask) mask = self._matrix_mask.astype(bool) if mask.shape == (self._n_neurons, self._n_neurons): input_weights = self._weight_generator( (self._n_inputs, self._n_neurons)) body_weights = generate_masked(self._weight_generator, mask) elif mask.shape == (self._n_inputs + self._n_neurons, self._n_neurons): input_weights = generate_masked(self._weight_generator, mask[:self._n_inputs]) body_weights = generate_masked(self._weight_generator, mask[self._n_inputs:]) else: self._assert_matrix_shape(self._matrix_mask, key="matrix_mask") self._matrix = np.vstack((input_weights, body_weights)) diagonal = np.arange(self._n_neurons) self._matrix[diagonal + self._n_inputs, diagonal] = 0.0 if self._force_unidirectional: for x in range(self._n_neurons): for y in range(x, self._n_neurons): if (not self._matrix[x + self._n_inputs, y] or not self._matrix[y + self._n_inputs, x]): continue if np.random.randint(0, 2): self._matrix[x + self._n_inputs, y] = 0.0 else: self._matrix[y + self._n_inputs, x] = 0.0 self._matrix *= self._max_weight self._matrix = np.clip(self._matrix, 0, self._max_weight) self._matrix = np.ma.array(self._matrix, mask=(self._matrix == 0), fill_value=0) self._assert_matrix_shape(self._matrix, key="matrix")
class NeuronRates(Readout): """ Translator from output neuron spike trains to actions for the environment. Actions set are neuron firing rates. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "n_outputs": 10, "magnitude": 2, "n_actions": 1, } readout = NeuronRates(**config) readout.reset() action = readout(np.ones((10, config["n_outputs"]))) .. code-block:: python class network_template(Network): keys = { "n_outputs": 10, "magnitude": 2, "n_actions": 1, } parts = { "readout": NeuronRates } """ NECESSARY_KEYS = Readout.extend_keys( [ Key( "n_actions", "Number of groups to put neurons into. 0 pools means each neuron separate output.", int, ), ] ) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) if self._n_actions == 0: self._n_actions = self._n_outputs def __call__(self, output_spike_train: np.bool) -> np.float: """ Interpret the output neuron's spike train into pool firing rates. Called once per game step. Parameters ---------- output_spike_train: np.ndarray[t, n_neurons, dtype=bool] Spike train with train[-1] being the most recent time. Returns ------- ndarray[n_action, dtype=float] Firing rate of each neuron pool. """ if self._n_outputs == 0: return 0 idx = np.linspace(0, self._n_outputs, self._n_actions + 1).astype(int) pools = [ output_spike_train[:, idx[i] : idx[i + 1]] for i in range(self._n_actions) ] return np.mean(pools, axis=(1, 2))
class StaticMap(Input): """ Custom state - input firings map. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python processing_time = 10 config = { "n_inputs": 10, "magnitude": 2, "input_firing_steps": -1, "input_pct_inhibitory": 0.2, "state_spike_map": { (1, 0): np.random.uniform(20, 10) <= .8, (.5, .5): np.random.uniform(20, 10) <= .3 }, } input = StaticMap(**config) input.reset() env = Logic(preset='XOR') state = env.reset() for step in range(10): input.update(state) for _ in range(processing_time) in_fires = input.__call__() state, _, done, __ = env.update(0) if done: break .. code-block:: python class network_template(Network): keys = { "n_inputs": 10, "magnitude": 2, "input_firing_steps": -1, "input_pct_inhibitory": 0.2, "state_spike_map": { 'state1': np.random.uniform(20, 10) <= .5, 'state2': np.random.uniform(20, 10) <= .5 }, } parts = { "inputs": StaticMap } """ NECESSARY_KEYS = Input.extend_keys([ Key( "state_spike_map", "dict[tuple]->ndarray[processing_time, n_inputs, dtype=bool] State to fires map..", type=(dict, np.ndarray), ) ]) def __call__(self) -> np.bool: """ Spikes output from each input neuron, called once per network step. Returns ------- ndarray[n_inputs, dtype=bool] Spike output for each neuron. """ output = np.array(self._state_spike_map[self.values]) if len(output.shape) > 1: spikes = [ value * self._magnitude for value in output[self.network_time] ] else: spikes = [value * self._magnitude for value in output] self.network_time += 1 return np.array(spikes) * self.polarities
class Reward(Module): """ Determine reward to give agent. Reward in a spiking neural network is meant to simulate dopamine in the real brain. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "reward_mult": 1, "punish_mult": -2, } rewarder = Reward(**config) rewarder.reset() r = rewarder(state, action, state_next) .. code-block:: python class network_template(Network): keys = { "reward_mult": 1, "punish_mult": -2, } parts = { "rewarder": Reward } """ NECESSARY_KEYS = [ Key( "reward_mult", "Multiplier for reward, reward = 1 * reward_mult.", float, default=1, ), Key( "punish_mult", "Multiplier for punishment, punish = -1 * punish_mult.", float, default=0, ), ] def __init__(self, **kwargs): super().__init__(**kwargs) if self._punish_mult < 0: print( "WARNING: Punish mult given is negative meaning you will give positive punishment." ) def reset(self): """ Reset rewarder member variables. Called at the start of each episode. """ pass def __call__(self, state: object, action: object, state_next: object) -> float: """ Determine how much reward should be given for taking action in state. Called once per game or network step based on network chosen. Parameters ---------- state: any Environment state before action is taken. action: any Action taken in response to state. state_next: any State of environment after action was taken. Returns ------- float Reward for taking action in state. """ raise NotImplementedError(f"__call__ not implemented for {type(self)}!")
class GenericLoop(TrainingLoop): """ Generic reinforcement learning training loop. .. code-block:: python for ep in n_episodes: while not done or until i == len_episode: action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next Parameters ---------- network_template: Network[type] or Network Network to train. game_template: RL[type] or RL Game to train. params: dict Network, game and training parameters. Examples -------- .. code-block:: python experiment = GenericLoop(Network, RL, **config) experiment.reset() network, game, results, info = experiment() """ NECESSARY_KEYS = TrainingLoop.extend_keys([ Key("n_episodes", "Number of episodes to run in the experiment.", int), Key("len_episode", "Number of environment timesteps in each episode", int), ]) def __call__(self) -> (object, object, dict, dict): """ Run training loop a single time. Returns ------- network: Network, game: RL, results: dict, info: dict. Examples -------- .. code-block:: python experiment = TrainingLoop(Network, RL, **config) experiment.reset() network, game, results, info = experiment() """ network, game = self.init() for e in range(self.params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(self.params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) if hasattr(network, "reward") and callable( getattr(network, "reward")): reward = network.reward(state, action, state_next) state = state_next if done: break self.callback.training_end() return [*self.callback]
class PopulationVector(Readout): """ Population vector coding readout from output neuron spike trains to actions for the environment. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "n_outputs": 10, "magnitude": 2, "n_actions": 2, } readout = PopulationVector(**config) readout.reset() action = readout(np.ones((10, config["n_outputs"]))) .. code-block:: python class network_template(Network): keys = { "n_outputs": 10, "magnitude": 2, "n_actions": 2, } parts = { "readout": PopulationVector } """ NECESSARY_KEYS = Readout.extend_keys([ Key("n_actions", "Number of action groups.", int), ]) def __call__(self, output_spike_train: np.bool) -> np.float: """ Interpret the output neuron's spike train via population vector coding. Called once per game step. Parameters ---------- output_spike_train: np.ndarray[t, n_neurons, dtype=bool] Spike train with train[-1] being the most recent time. Returns ------- ndarray[n_actions, dtype=float] Normalized rate from each output pool. """ if self._n_outputs == 0: return np.zeros(self._n_actions) spikes = np.where(output_spike_train, 1, 0) spike_counts = np.sum(spikes, axis=0) group_size = self._n_outputs // self._n_actions p = [ np.sum(spike_counts[i * group_size:(i + 1) * group_size]) for i in range(self._n_actions) ] p = np.array(p) if np.sum(p) != 0: actions = p / np.sum(p) else: actions = np.ones(p.shape) / p.size return actions
class Population(Module): """ An evolving population. See genotype constraint docs in spikey/meta/series. Parameters ---------- game: MetaRL MetaRL game to evolve agents for. backend: MetaBackend, default=MultiprocessBackend(max_process) Backend to execute experiments with. max_process: int, default=16 Number of separate processes to run experiments for default backend. kwargs: dict, default=None Any configuration, required keys listed in NECESSARY_KEYS. Examples -------- .. code-block:: python metagame = EvolveNetwork(GenericLoop(network, game, **params), **metagame_config,) population = Population(metagame, **pop_config) while not population.terminated: fitness = population.evaluate() population.update(fitness) print(f"{population.epoch} - Max fitness: {max(fitness)}") """ NECESSARY_KEYS = [ Key("n_storing", "Number of genotypes to store in cache.", int), Key( "n_agents", "Number of agents in population per epoch.", (int, list, tuple, np.ndarray), ), Key( "n_epoch", "Number of epochs -- unused if n_agents is iterable.", int, default=9999, ), Key( "mutate_eligable_pct", "(0, 1] Pct of prev agents eligable to be mutated.", float, ), Key( "max_age", "Max age agent can reach before being removed from mutation/crossover/survivor pools.", int, ), Key( "random_rate", "(0, 1) Percent agents in population to generate randomly.", float, ), Key( "survivor_rate", "(0, 1) Percent(new generation) previous generation preserved/turn.", float, ), Key( "mutation_rate", "(0, 1) Percent(new generation) previous generation mutated/turn.", float, ), Key( "crossover_rate", "(0, 1) Percent(new generation) previous generation crossed over/turn.", float, ), Key("logging", "Whether to log or not.", bool, default=True), Key("log_fn", "f(n, g, r, i, filename) Logging function.", default=log), Key("folder", "Folder to save logs to.", str, default="log"), ] def __init__( self, game: object, backend: object = None, max_process: int = 16, **config, ): super().__init__(**config) self.genotype_constraints = game.GENOTYPE_CONSTRAINTS self.get_fitness = game.get_fitness self.backend = backend or MultiprocessBackend(max_process) if isinstance(self._n_agents, (list, tuple, np.ndarray)): self.n_agents = list(self._n_agents) else: self.n_agents = [self._n_agents for _ in range(self._n_epoch)] self.epoch = 0 # For summaries self.terminated = False self.cache = GenotypeMapping(self._n_storing) self.population = [self._random() for _ in range(self.n_agents[self.epoch])] if self._mutate_eligable_pct == 0: raise ValueError("mutate_eligable pct cannot be 0!") self._normalize_rates() if self._logging: self._setup_logging(config, game.params) def _normalize_rates(self): """ Normalize pertinent algorithm rates to 1. """ total = ( self._random_rate + self._survivor_rate + self._mutation_rate + self._crossover_rate ) if not total: raise ValueError( "Need nonzero value for the survivor, mutation or crossover rate." ) self._random_rate /= total self._survivor_rate /= total self._mutation_rate /= total self._crossover_rate /= total def _setup_logging(self, pop_params, game_params): self.multilogger = MultiLogger(folder=self._folder) info = {"population_config": pop_params} info.update({"metagame_info": game_params}) self.multilogger.summarize(results=None, info=info) def __len__(self) -> int: return len(self.population) def _genotype_dist(self, genotype1: dict, genotype2: dict) -> float: """ Testing Population._genotype_dist. Parameters ---------- genotype1: genotype Genotypes to find the distance between. genotype2: genotype Genotypes to find the distance between. Returns ------- Euclidean distance between the two genotypes. """ total = 0 for key in self.genotype_constraints.keys(): if isinstance(genotype1[key], (list, tuple)): for i in range(len(genotype1[key])): total += (genotype1[key][i] - genotype2[key][i]) ** 2 continue total += (genotype1[key] - genotype2[key]) ** 2 return total ** 0.5 def _random(self) -> dict: """ Randomly generate a genotype given constraints. """ eval_constraint = ( lambda cons: np.random.uniform(*cons) if isinstance(cons, tuple) else cons[np.random.choice(len(cons))] ) genotype = { key: eval_constraint(constraint) for key, constraint in self.genotype_constraints.items() } genotype["_age"] = 0 return genotype def _mutate(self, genotypes: list) -> list: """ Mutate a random key of each genotype given. """ if not isinstance(genotypes, (list, np.ndarray)): genotypes = [genotypes] new_genotypes = [] for genotype in genotypes: new_genotype = deepcopy(genotype) ## prevent edit of original! key = np.random.choice(list(self.genotype_constraints.keys())) cons = self.genotype_constraints[key] if isinstance(cons, tuple): new_genotype[key] = np.random.uniform(*cons) else: new_genotype[key] = cons[np.random.choice(len(cons))] new_genotype["_age"] = 0 new_genotypes.append(new_genotype) return new_genotypes def _crossover(self, genotype1: dict, genotype2: dict) -> [dict, dict]: """ Crossover two different genotypes. Parameters ---------- genotype: dict, str: float Genotype. Returns ------- 2 new genotypes. """ offspring1, offspring2 = {}, {} switch = False switch_key = np.random.choice(list(self.genotype_constraints.keys())) keys = list(self.genotype_constraints.keys()) np.random.shuffle(keys) # Prevent bias for key in keys: if key == switch_key: switch = True offspring1[key] = genotype1[key] if switch else genotype2[key] offspring2[key] = genotype2[key] if switch else genotype1[key] offspring1["_age"] = 0 offspring2["_age"] = 0 return [offspring1, offspring2] def update(self, f: list): """ Update the population based on each agents fitness. Parameters ---------- f: list of float Fitness values for each agent. """ self.epoch += 1 try: n_agents = self.n_agents[self.epoch] except (StopIteration, IndexError): self.terminated = True return prev_gen = [(self.population[i], f[i]) for i in range(len(f))] prev_gen = sorted(prev_gen, key=lambda x: x[1]) prev_gen = [value[0] for value in prev_gen if value[0]["_age"] < self._max_age] self.population = [] self.population += [ self._random() for _ in range(int(n_agents * self._random_rate)) ] if int(n_agents * self._survivor_rate): # -0 returns whole list!! survivors = [ deepcopy(genotype) for genotype in prev_gen[-int(n_agents * self._survivor_rate) :] ] for genotype in survivors: genotype["_age"] += 1 self.population += survivors mutate_candidates = prev_gen[-int(self._mutate_eligable_pct * len(prev_gen)) :] self.population += self._mutate( [ deepcopy(genotype) for genotype in np.random.choice( mutate_candidates, size=int(n_agents * self._mutation_rate) ) ] ) for _ in range(int(n_agents * self._crossover_rate) // 2): genotype1 = np.random.choice(prev_gen) genotype2 = np.random.choice(prev_gen) self.population += self._crossover(deepcopy(genotype1), deepcopy(genotype2)) if len(self) < n_agents: diff = n_agents - len(self) self.population += self._mutate(np.random.choice(prev_gen, size=diff)) def evaluate(self) -> list: """ Evaluate each agent on the fitness function. Returns ------- Fitness values for each agent. """ params = [ ( self.get_fitness, self.cache, genotype, self._log_fn, next(self.multilogger.filename_generator) if self._logging else None, ) for genotype in self.population ] results = self.backend.distribute(run, params) fitnesses = [result[0] for result in results] terminated = [result[1] for result in results] if any(terminated): self.terminated = True return fitnesses
class Network(Module): """ The foundation for building and handling spiking neural networks. Network serves as the container and manager of all SNN parts like the neurons, synapses, reward function, ... It is designed to interact with an RL environment. .. note:: There are a few types of Networks for different uses, this one is the base template for any generic usage. Parameters ---------- callback: ExperimentCallback, default=None Callback to send relevant function call information to for logging. game: RL, default=None The environment the network will be interacting with, parameter is to allow network to pull relevant parameters in init. kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } params = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } config = {**parts, **params} game = Logic(preset="XOR", **config) network = Network(game=game, **config) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next if done: break .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(Network): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next if done: break """ NECESSARY_KEYS = [ Key("n_inputs", "Number input neurons, separate from body.", int), Key("n_outputs", "Number of output neurons, a subset of body neurons.", int), Key("n_neurons", "Number of neurons in the network.", int), Key( "processing_time", "Number of network timesteps per game timestep.", int, ), ] NECESSARY_PARTS = [ Key("inputs", "snn.input.Input"), Key("neurons", "snn.neuron.Neuron"), Key("weights", "snn.weight.Weight"), Key("synapses", "snn.synapse.Synapse"), Key("readout", "snn.readout.Readout"), Key("modifiers", "list of snn.modifier.Modifier", default=None), ] def __init__( self, callback: object = None, game: object = None, **kwargs, ): if not hasattr(self, "parts"): self.parts = {} else: self.parts = deepcopy(type(self).parts) if "modifiers" not in self.parts: self.parts["modifiers"] = None for key in self.NECESSARY_PARTS: if key in kwargs: self.parts[key] = kwargs[key] self._params = {} if game is None else deepcopy(game.params) if hasattr(self, "keys"): self._params.update(self.keys) self._params.update(kwargs) super().__init__(**self._params) self.callback = callback or ExperimentCallback() self._init_parts() self.internal_time = self._spike_log = None self.callback.network_init(self) def _init_parts(self): for key in self.NECESSARY_PARTS: name = key.name if isinstance(key, Key) else key if name in self.parts: part = self.parts[name] elif isinstance(key, Key) and hasattr(key, "default"): part = key.default else: raise ValueError(f"No value given for key {name}!") if name == "synapses": value = part(self.weights, **self.params) elif part is None: value = part else: value = part(**self.params) setattr(self, name, value) if hasattr(self, "synapses") and hasattr(self, "weights"): self.synapses.weights = self.weights def train(self): """ Set the module to training mode, enabled by default. """ self.training = True for key in self.NECESSARY_PARTS: name = key.name if isinstance(key, Key) else key try: getattr(self, name).train() except AttributeError: pass def eval(self): """ Set the module to evaluation mode, disabled by default. """ self.training = False for key in self.NECESSARY_PARTS: name = key.name if isinstance(key, Key) else key try: getattr(self, name).eval() except AttributeError: pass @property def params(self) -> dict: """ Read only configuration of network. """ return deepcopy(self._params) @property def spike_log(self) -> np.bool: """ Neuron spike log over processing_time with spike_log[-1] being most recent. """ try: return self._spike_log[-self._processing_time:] except TypeError: return None @classmethod def list_keys(cls, **parts): """ Print list of all required keys for the Network and its parts. """ if isinstance(cls.NECESSARY_KEYS, dict): KEYS = {} else: KEYS = deepcopy(cls.NECESSARY_KEYS) for part in parts.values(): if not hasattr(part, "NECESSARY_KEYS"): continue if isinstance(KEYS, dict): KEYS.update(part.NECESSARY_KEYS) else: KEYS.extend([p for p in part.NECESSARY_KEYS if p not in KEYS]) if isinstance(cls.NECESSARY_KEYS, dict): KEYS.update(cls.NECESSARY_KEYS) print("{") for key in KEYS: if isinstance(key, Key): print(f"\t{str(key)},") else: desc = cls.NECESSARY_KEYS[key] print(f"\t{key}: {desc},") print("}") def __deepcopy__(self, memo={}): cls = self.__class__ network = cls.__new__(cls) memo[id(self)] = network for k, v in self.__dict__.items(): setattr(network, k, deepcopy(v, memo)) network._init_parts() return network def reset(self): """ Set network to initial state. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(Network): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next if done: break """ self.internal_time = 0 self.neurons.reset() self.synapses.reset() if hasattr(self, "rewarder"): self.rewarder.reset() self.readout.reset() self.inputs.reset() for modifier in self.modifiers or []: modifier.reset() self._spike_log = np.zeros( ( self.synapses._stdp_window + self._processing_time, self._n_inputs + self._n_neurons, ), dtype=np.float16, ) self.callback.network_reset() def _process_step(self, i: int, state: object): """ Execute one processing step. Parameters ---------- i: int Current processing timestep. state: any Current environment state. """ self.internal_time += 1 spikes = np.append(self.inputs(), self.neurons()) self._spike_log[self.synapses._stdp_window + i] = spikes self._normalized_spike_log[self.synapses._stdp_window + i] = spikes.astype(bool) self.neurons += np.sum(self.synapses.weights * spikes.reshape((-1, 1)), axis=0) self.synapses.update( self._normalized_spike_log[i:i + self.synapses._stdp_window], self._polarities, ) def tick(self, state: object) -> object: """ Determine network response to given stimulus. Parameters ---------- state: any Current environment state. Returns ------- any Network response to stimulus. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(Network): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next if done: break """ self._polarities = np.append(self.inputs.polarities, self.neurons.polarities) self._spike_log[:self.synapses._stdp_window] = self._spike_log[ -self.synapses._stdp_window:] self._normalized_spike_log = self._spike_log.astype(bool) self.inputs.update(state) if self.modifiers is not None: for modifier in self.modifiers: modifier.update(self) for i in range(self._processing_time): self._process_step(i, state) outputs = self._spike_log[-self._processing_time:, -self._n_outputs:] output = self.readout(outputs) self.callback.network_tick(state, output) return output
class MatchExpected(Reward): """ Give reward if action is the same as expected. Reward in a spiking neural network is meant to simulate dopamine in the real brain. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "reward_mult": 1, "punish_mult": 2, } rewarder = MatchExpected(**config) rewarder.reset() r = rewarder(state, action, state_next) .. code-block:: python class network_template(Network): keys = { "reward_mult": 1, "punish_mult": 2, "expected_value": , } parts = { "rewarder": MatchExpected } """ NECESSARY_KEYS = Reward.extend_keys([ Key("expected_value", "func(state)->action Expected action."), ]) def __call__(self, state: object, action: object, state_next: object) -> float: """ Determine how much reward should be given for taking action in state. reward_mult if action == expected else punish_mult. Called once per game or network step based on network chosen. Parameters ---------- state: any Environment state before action is taken. action: any Action taken in response to state. state_next: any State of environment after action was taken. Returns ------- float Reward for taking action in state. """ expected = self._expected_value(state) rwd = np.sum( np.where(action == expected, self._reward_mult, -self._punish_mult)) return rwd
class RLNetwork(Network): """ The foundation for building and handling spiking neural networks. Network serves as the container and manager of all SNN parts like the neurons, synapses, reward function, ... It is designed to interact with an RL environment. .. note:: There are a few types of Networks for different uses, this one is the base for reinforcement learning with SNNs giving one reward per game update(see ActiveRLNetwork reward for per network step). Parameters ---------- callback: ExperimentCallback, default=None Callback to send relevant function call information to for logging. game: RL, default=None The environment the network will be interacting with, parameter is to allow network to pull relevant parameters in init. kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "rewarder": snn.reward.Reward, "modifiers": None, # [snn.modifier.Modifier,] } params = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } config = {**parts, **params} game = Logic(preset="XOR", **config) network = RLNetwork(game=game, **config) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next if done: break .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(RLNetwork): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "rewarder": snn.reward.Reward, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next if done: break """ NECESSARY_PARTS = Network.extend_keys( [ Key("rewarder", "snn.reward.Reward"), ], base="NECESSARY_PARTS", ) def __init__( self, callback: object = None, game: object = None, **kwargs, ): super().__init__(callback=callback, game=game, **kwargs) def reward(self, state: object, action: object, state_next: object, reward: float = None) -> float: """ If reward given as parameter, apply reward to synapses. Otherwise rewarder calculates based on state and action, then applies to synapses. Called once per game step. Parameters ---------- state: any State of environment where action was taken. action: any Action taken by network in response to state. state_next: any State of environment after action was taken. reward: float, default=None Reward to give network, if None it will be determined by the rewarder. Returns ------- float Reward given to network. Examples -------- .. code-block:: python experiment_params = { "n_episodes": 100, "len_episode": 200, } class network_template(RLNetwork): parts = { "inputs": snn.input.Input, "neurons": snn.neuron.Neuron, "weights": snn.weight.Weight, "synapses": snn.synapse.Synapse, "readout": snn.readout.Readout, "modifiers": None, # [snn.modifier.Modifier,] } keys = { "n_inputs": 10, "n_outputs": 10, "n_neurons": 50, "processing_time": 200, # + all part parameters, see Network.list_keys(**parts) } kwargs = { "n_neurons": 100, # Overrides n_neurons in network_template.keys } game = Logic(preset="XOR", **kwargs) network = network_template(game=game, **kwargs) for _ in range(experiment_params["n_episodes"]): network.reset() state = game.reset() state_next = None for s in range(experiment_params["len_episode"]): action = network.tick(state) state_next, _, done, __ = game.step(action) reward = network.reward(state, action, state_next) state = state_next if done: break """ reward = (reward if reward is not None else self.rewarder( state, action, state_next)) self.synapses.reward(reward) self.callback.network_reward(state, action, state_next, reward) return reward
class RandPotential(Neuron): """ A group of spiking neurons with noise `~U(0, potential_noise_scale)` is added to `n_neurons * prob_rand_fire` neurons at each step. Each spiking neuron has an internal membrane potential that increases with each incoming spike. The potential persists but slowly decreases over time. Each neuron fires when its potential surpasses some firing threshold and does not fire again for the duration of its refractory period. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "magnitude": 2, "n_neurons": 100, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, "resting_mv": 0, "spike_delay": 0, "potential_noise_scale": .1, } neurons = Neuron(**config) neurons.reset() weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons])) for i in range(100): spikes = self.neurons() neurons += np.sum( weights * spikes.reshape((-1, 1)), axis=0 ) .. code-block:: python class network_template(Network): keys = { "magnitude": 2, "n_neurons": 100, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, "potential_noise_scale": .1, } parts = { "neurons": Neuron } """ NECESSARY_KEYS = Neuron.extend_keys([ Key("potential_noise_scale", "Multiplier of leak to add to potential.", float) ]) def __call__(self) -> np.bool: """ Add noise `~U(0, potential_noise_scale)` to `n_neurons * prob_rand_fire` neurons then determine whether each neuron will fire or not according to threshold. Called once per network step. Parameters ---------- threshold: float Spiking threshold, neurons schedule spikes if potentials >= threshold. Returns ------- ndarray[n_neurons, dtype=bool] Spike output from each neuron at the current timestep. Examples -------- .. code-block:: python config = { "magnitude": 2, "n_neurons": 100, "neuron_pct_inhibitory": .2, "potential_decay": .2, "prob_rand_fire": .08, "refractory_period": 1, "potential_noise_scale": .1, "firing_threshold": 16, } neurons = Neuron(**config) neurons.reset() weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons])) for i in range(100): spikes = self.neurons() neurons += np.sum( weights * spikes.reshape((-1, 1)), axis=0 ) """ noise = np.random.uniform(0, self._potential_noise_scale, size=self._n_neurons) noise[~(np.random.uniform(0, 1, size=self._n_neurons) <= self. _prob_rand_fire)] = 0 self.potentials += noise spike_occurences = self.potentials >= self._firing_threshold self.refractory_timers[spike_occurences] = self._refractory_period + 1 self.schedule += self.spike_shape * np.int_(spike_occurences) output = self.schedule[0] * self.polarities * self._magnitude return output
class Readout(Module): """ Translator from output neuron spike trains to actions for the environment. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "n_outputs": 10, "magnitude": 2, } readout = Readout(**config) readout.reset() action = readout(np.ones((10, config["n_outputs"]))) .. code-block:: python class network_template(Network): keys = { "n_outputs": 10, "magnitude": 2, } parts = { "readout": Readout } """ NECESSARY_KEYS = [ Key("n_outputs", "Number of output neurons, a subset of body neurons.", int), Key("magnitude", "Spike fire magnitude.", float), ] def __init__(self, **kwargs): super().__init__(**kwargs) def reset(self): """ Reset all readout members. Called at the start of each episode. """ pass def __call__(self, output_spike_train: np.bool) -> object: """ Interpret the output neuron's spike train. Called once per game step. Parameters ---------- output_spike_train: np.ndarray[t, n_neurons, dtype=bool] Spike train with train[-1] being the most recent time. Returns ------- object Action chosen. """ raise NotImplementedError( f"__call__ not implemented for {type(self)}!")
class Logic(RL): """ Game of trying to mimic logic gates. Parameters ---------- preset: str=PRESETS.keys(), default="OR" Configuration preset key, default values for game parameters. callback: ExperimentCallback, default=None Callback to send relevant function call information to. kwargs: dict, default=None Game parameters for NECESSARY_KEYS. Overrides preset settings. Examples -------- .. code-block:: python game = Logic(preset="OR") game.seed(0) state = game.reset() for _ in range(100): action = model.get_action(state) state, reward, done, info = game.step(action) if done: break game.close() .. code-block:: python class game_template(Logic): config = Logic.PRESETS["XOR"] config.update({ # Overrides preset values "param1": 1 "param2": 2, }) kwargs = { "param1": 0, # Overrides game_template.config["param1"] } game = game_template(**kwargs) game.seed(0) state = game.reset() for _ in range(100): action = model.get_action(state) state, reward, done, info = game.step(action) if done: break game.close() """ action_space = [False, True] observation_space = [(a, b) for a in [False, True] for b in [False, True]] metadata = {} NECESSARY_KEYS = [ Key( "expected_value", "func(state) Correct response of logic gate to specific state.", ), ] PRESETS = { "AND": { "name": "AND", "expected_value": and_fn, }, "OR": { "name": "OR", "expected_value": or_fn, }, "XOR": { "name": "XOR", "expected_value": xor_fn, }, } def __init__(self, preset: str = "OR", callback: object = None, **kwargs): super().__init__(preset=preset, callback=callback, **kwargs) def _get_state(self) -> np.ndarray: """ Randomly generate a network state. Returns ------- ndarray[2, bool] Randomly generated inputs to logic gate. """ state = np.random.uniform(size=2) <= 0.5 return tuple(state) def step(self, action: bool) -> (np.ndarray, 0, bool, {}): """ Act within the environment. Parameters ---------- action: bool Action taken in environment. Returns ------- state: np.ndarray ndarray[2, bool] Randomly generated inputs to logic gate. reward: float, = 0 Reward given by environment. done: bool Whether the game is done or not. info: dict, = {} Information of environment. Examples -------- .. code-block:: python game = Logic(preset="OR") game.seed(0) state = game.reset() for _ in range(100): action = model.get_action(state) state, reward, done, info = game.step(action) if done: break game.close() """ state_new = self._get_state() done = False rwd = 0 info = {} self.callback.game_step(action, self._state, state_new, rwd, done, info) self._state = state_new return state_new, rwd, done, info def reset(self) -> np.ndarray: """ Reset environment. Returns ------- np.ndarray[2, bool] Initial state, random inputs to logic gate. Examples -------- .. code-block:: python game = Logic(preset="OR") game.seed(0) state = game.reset() """ state = self._get_state() self.callback.game_reset(state) self._state = state return state
class Weight(Module): """ The data structure to generate and manage connections between neurons. Contains generation, arithmetic and get operations. Updates are handled in spikey.snn.Synapse objects. .. note:: Weight._matrix must be a masked ndarray with fill_value=0 while Weight.matrix is a simple ndarray. Arithmetic operations(a * b) use unmasked matrix for speed while inplace(a += b) arithmetic uses masked values. Get operations(Weight[[1, 2, 3]]) apply to masked ndarray. Parameters ---------- kwargs: dict Dictionary with values for each key in NECESSARY_KEYS. Examples -------- .. code-block:: python config = { "n_inputs": 1, "n_neurons": 10, "max_weight": 3, } w = Weight(**config) in_volts = w * np.ones(config['n_neurons']) .. code-block:: python class network_template(Network): keys = { "n_inputs": 1, "n_neurons": 10, "max_weight": 3, } parts = { "weights": Weight } """ NECESSARY_KEYS = [ Key("n_inputs", "Number input neurons, separate from body.", int), Key("n_neurons", "Number of neurons in network.", int), Key("max_weight", "Max synapse weight.", float), ] def __init__(self, **kwargs): self._matrix = None super().__init__(**kwargs) def _assert_matrix_shape(self, matrix, key): expected_shape = (self._n_inputs + self._n_neurons, self._n_neurons) real_shape = matrix.shape if not np.array_equal(real_shape, expected_shape): base_error = f"Expected '{key}' shape to equal (N_INPUTS+N_NEURONS, N_NEURONS)[{expected_shape}], not {real_shape}!" if len(real_shape) > 2: raise ValueError( base_error + f" Squeeze extra single valued dimensions with `{key}.squeeze()`." ) elif np.array_equal(real_shape, (self._n_neurons, self._n_neurons)): raise ValueError( base_error + " Add N_INPUTS to the first dimension of your matrix.") elif np.array_equal( real_shape, (self._n_neurons, self._n_inputs + self._n_neurons)): raise ValueError(base_error + f" Transpose your matrix with `{key}.T`.") else: raise ValueError(base_error) def _convert_feedforward(self, layers): """ Convert network in feedforward layer format to weight matrix format. NOTE: Layers given as masked arrays will have masks dropped. Parameters ---------- layers: [ndarray, ndarray, ...] Network to convert. Returns ------- ndarray Network in weight matrix format. """ matrix = np.zeros((self._n_inputs + self._n_neurons, self._n_neurons), dtype=float) row_offset, col_offset = 0, 0 for i, layer in enumerate(layers): n, m = layer.shape matrix[row_offset:row_offset + n, col_offset:col_offset + m] = layer row_offset += n col_offset += m return matrix @property def matrix(self) -> np.float: """ Return unmasked weight matrix. """ if isinstance(self._matrix, np.ma.core.MaskedArray): return self._matrix.data return self._matrix def clip(self): """ Restrict weights to 0 and max_weight. """ np.clip(self._matrix.data, 0.0, float(self._max_weight), out=self._matrix.data) def __get__(self, obj: object, objtype: object) -> np.float: return self.matrix def __set__(self, obj: object, value: object): self.matrix = value def __getitem__(self, idx: np.int) -> np.float: return self._matrix[idx] def __add__(self, addend: np.ndarray) -> np.float: return self.matrix + addend def __iadd__(self, addend: np.ndarray): self._matrix += addend self.clip() return self def __sub__(self, subtractor: np.ndarray) -> np.float: return self.matrix - subtractor def __isub__(self, subtractor: np.ndarray): self._matrix -= subtractor self.clip() return self def __mul__(self, multiplier: np.ndarray) -> np.float: return self.matrix * multiplier def __imul__(self, multiplier: np.ndarray): self._matrix *= multiplier self.clip() return self def __truediv__(self, divisor: np.ndarray) -> np.float: return self.matrix / divisor def __itruediv__(self, divisor: np.ndarray): self._matrix /= divisor self.clip() return self
class MetaNQueens(MetaRL): """ Game to try and place a number of queen chess pieces on a chess board without any of them being to attack another in the same move. 92 distinct solutions out of 4 billion possibilities w/ 8 queens. Genotypes are parameterized as follows, .. code-block:: python for i in range(n_agents): xi: int in {0, 7} X position of queen i. yi: int in {0, 7} Y position of queen i. Parameters ---------- kwargs: dict, default=None Game parameters for NECESSARY_KEYS. Overrides preset settings. Examples -------- .. code-block:: python metagame = MetaNQueens() game.seed(0) for _ in range(100): genotype = [{}, ...] fitness, done = metagame.get_fitness(genotype) if done: break game.close() .. code-block:: python metagame = MetaNQueens(**metagame_config) game.seed(0) population = Population(... metagame, ...) # population main loop """ NECESSARY_KEYS = MetaRL.extend_keys([ Key( "n_queens", "{1..8}Number of queens agent needs to place on board.", int, default=8, ) ]) GENOTYPE_CONSTRAINTS = {} ## Defined in __init__ def __init__(self, **kwargs): super().__init__(**kwargs) if self._n_queens > 8 or self._n_queens < 1: raise ValueError( f"n_queens must be in range [1, 8], not {self._n_queens}!") self.letters = ["a", "b", "c", "d", "e", "f", "g", "h"][:self._n_queens] keys = [ first + second for second in ["x", "y"] for first in self.letters ] self.GENOTYPE_CONSTRAINTS = {key: list(range(8)) for key in keys} @staticmethod def setup_game() -> list: """ Setup game. Returns ------- list Initial board state, number of queens in each horizontal, vertical and diagonal line. """ horizontals = np.zeros(8) verticals = np.zeros(8) ldiagonals = np.zeros(15) # \ rdiagonals = np.zeros(15) # / return horizontals, verticals, ldiagonals, rdiagonals @staticmethod def run_move(board: list, move: tuple) -> list: """ Execute action. Parameters ---------- board: list Number of queens across each horizontal, vertical and diagonal line. move: (x, y) in [0, 7] X and Y coordinate to place queen. Returns ------- [horizontals: list, verticals: list, ldiagonals: list, rdiagonals: list] Updated board. """ horizontals, verticals, ldiagonals, rdiagonals = board x, y = move horizontals[x] += 1 verticals[y] += 1 ldiagonals[x + y] += 1 rdiagonals[7 - x + y] += 1 return horizontals, verticals, ldiagonals, rdiagonals def get_fitness( self, genotype: dict, ) -> (float, bool): """ Evaluate the fitness of a genotype. Parameters ---------- genotype: dict Dictionary with values for each key in GENOTYPE_CONSTRAINTS. Returns ------- fitness: float Fitness of genotype given. done: bool Whether termination condition has been reached or not. Examples -------- .. code-block:: python metagame = MetaNQueens() game.seed(0) for _ in range(100): genotype = [{}, ...] fitness, done = metagame.get_fitness(genotype) if done: break game.close() """ board = self.setup_game() for letter in self.letters: move = (genotype[letter + "x"], genotype[letter + "y"]) board = self.run_move(board, move) clashes = 0 for item in board: clashes += np.sum(item[item > 1] - 1) fitness = 28 - clashes terminate = clashes == 0 return fitness, terminate