Exemple #1
0
class RateMap(Input):
    """
    Uniform spike train generator with rates based on environment state.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        processing_time = 10
        config = {
            "n_inputs": 10,
            "magnitude": 2,
            "input_firing_steps": -1,
            "input_pct_inhibitory": 0.2,
            "state_rate_map": [.0, .8],
        }
        input = RateMap(**config)
        input.reset()
        env = Logic(preset='XOR')

        state = env.reset()
        for step in range(10):
            input.update(state)

            for _ in range(processing_time)
                in_fires = input.__call__()

            state, _, done, __ = env.update(0)

            if done:
                break

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_inputs": 10,
                "magnitude": 2,
                "input_firing_steps": -1,
                "input_pct_inhibitory": 0.2,
                "state_rate_map": [.0, .8],
            }
            parts = {
                "inputs": RateMap
            }
    """

    NECESSARY_KEYS = Input.extend_keys(
        [
            Key(
                "state_rate_map",
                "dict[float or list[floats] if groups>1] Elementwise State->Rate map.",
                type=(dict, np.ndarray),
            ),
        ]
    )

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def __call__(self) -> np.bool:
        """
        Spikes output from each input neuron, called once per network step.

        Returns
        -------
        ndarray[n_inputs, dtype=bool] Spike output for each neuron.
        """
        if not self.values.size:
            return []

        if (
            self._input_firing_steps != -1
            and self.network_time > self._input_firing_steps
        ):
            return np.zeros(self.values.shape)

        spikes = np.where(
            np.random.uniform(0, 1, size=self.values.size) <= self.values,
            self._magnitude,
            0.0,
        )

        self.network_time += 1
        return spikes * self.polarities

    def update(self, state: object):
        """
        Update input generator, called once per game step.

        Parameters
        ----------
        state: object
            Enviornment state in format generator can understand.
        """
        self.network_time = 0

        if not isinstance(self._state_rate_map, dict):
            if isinstance(state, (int, float)):
                state = np.int_([state])
            elif isinstance(state, (np.ndarray, list, tuple)):
                try:
                    state = np.int_(state)
                except TypeError:
                    pass

        rate = self._state_rate_map[state]

        if not rate.size or self._n_inputs % rate.size:
            raise ValueError(
                f"N_INPUTS must divide evenly by number of value in rate, {self._n_inputs} / {rate.size}"
            )

        self.values = np.ravel(
            np.array([rate for _ in range(self._n_inputs // rate.size)])
        )
Exemple #2
0
class Neuron(Module):
    """
    A group of spiking neurons.

    Each spiking neuron has an internal membrane potential that
    increases with each incoming spike. The potential persists but slowly
    decreases over time. Each neuron fires when its potential surpasses
    some firing threshold and does not fire again for the duration
    of its refractory period.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "magnitude": 2,
            "n_neurons": 100,
            "firing_threshold": 16,
            "neuron_pct_inhibitory": .2,
            "potential_decay": .2,
            "prob_rand_fire": .08,
            "refractory_period": 1,
        }
        neurons = Neuron(**config)
        neurons.reset()

        weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons]))

        for i in range(100):
            spikes = self.neurons()

            neurons += np.sum(
                weights * spikes.reshape((-1, 1)), axis=0
            )

    .. code-block:: python

        class network_template(Network):
            keys = {
                "magnitude": 2,
                "n_neurons": 100,
                "firing_threshold": 16,
                "neuron_pct_inhibitory": .2,
                "potential_decay": .2,
                "prob_rand_fire": .08,
                "refractory_period": 1,
            }
            parts = {
                "neurons": Neuron
            }
    """

    NECESSARY_KEYS = [
        Key("magnitude", "Magnitude of spike.", float),
        Key("n_neurons", "Number of neurons in the network.", int),
        Key("firing_threshold", "Neuron voltage threshold to fire.", float),
        Key(
            "neuron_pct_inhibitory",
            "[0, 1] Percentage of inhibitory neurons.",
            float,
            default=0,
        ),
        Key("potential_decay", "[0, 1] Percentage voltage loss on each tick.",
            float),
        Key(
            "prob_rand_fire",
            " [0, 1] Probability each neuron will randomly fire",
            float,
            default=0,
        ),
        Key("refractory_period",
            "Amount of time after spike neuron cannot fire.", int),
        Key("resting_mv", "Neuron resting voltage.", float, default=0.0),
        Key(
            "spike_delay",
            "[0, 10] Units of time after hitting threshold to fire.",
            int,
            default=0,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        if "polarities" in kwargs:
            self.polarities = np.array(kwargs["polarities"])
        else:
            polarities = np.random.uniform(size=self._n_neurons)
            self.polarities = np.where(
                polarities < self._neuron_pct_inhibitory, -1.0, 1.0)

        ## Initialized in self.reset()
        self.potentials = self.refractory_timers = None
        self.spike_shape = self.schedule = None

    def reset(self):
        """
        Reset all neuron members.
        Called at the start of each episode.
        """
        self.potentials = self._resting_mv * np.ones(self._n_neurons,
                                                     dtype="float16")

        self.refractory_timers = np.zeros(self._n_neurons)

        self.spike_shape = self._generate_spike_shape()
        self.schedule = np.zeros(shape=(self.spike_shape.size,
                                        self._n_neurons))

    def _generate_spike_shape(self) -> np.bool:
        """
        Generate neuron output schedule for time after it's potential passes
        the firing threshold.

        Returns
        -------
        ndarray[SCHEDULE_LENGTH, dtype=bool] Neuron output schedule.
        """
        SCHEDULE_LENGTH = max(10, self._spike_delay)
        spike_shape = np.zeros(shape=(SCHEDULE_LENGTH, 1))

        spike_shape[self._spike_delay] = 1

        return spike_shape

    def __call__(self) -> np.bool:
        """
        Determine whether each neuron will fire or not according to threshold.
        Called once per network step.

        Parameters
        ----------
        threshold: float
            Spiking threshold, neurons schedule spikes if potentials >= threshold.

        Returns
        -------
        ndarray[n_neurons, dtype=bool] Spike output from each neuron at the current timestep.

        Examples
        --------

        .. code-block:: python

            config = {
                "magnitude": 2,
                "n_neurons": 100,
                "neuron_pct_inhibitory": .2,
                "potential_decay": .2,
                "prob_rand_fire": .08,
                "refractory_period": 1,
            }
            neurons = Neuron(**config)
            neurons.reset()

            weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons]))

            for i in range(100):
                spikes = self.neurons()

                neurons += np.sum(
                    weights * spikes.reshape((-1, 1)), axis=0
                )
        """
        spike_occurences = self.potentials >= self._firing_threshold

        spike_occurences += (np.random.uniform(0, 1, size=self._n_neurons) <
                             self._prob_rand_fire)

        spike_occurences &= self.refractory_timers <= 0

        self.refractory_timers[spike_occurences] = self._refractory_period + 1
        self.schedule += self.spike_shape * np.int_(spike_occurences)

        output = self.schedule[0] * self.polarities * self._magnitude

        return output

    def __iadd__(self, incoming_v: np.float):
        """
        Cool alias for neuron.update.
        Simulate the neurons for one time step and add incoming
        voltage to the neurons membrane potentials.
        Called once per network step.

        Parameters
        ----------
        incoming_v: np.ndarray[neurons, dtype=float]
            Amount to increase each neuron's potential by.

        Examples
        --------

        .. code-block:: python

            config = {
                "magnitude": 2,
                "n_neurons": 100,
                "neuron_pct_inhibitory": .2,
                "potential_decay": .2,
                "prob_rand_fire": .08,
                "refractory_period": 1,
            }
            neurons = Neuron(**config)
            neurons.reset()

            weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons]))

            for i in range(100):
                spikes = self.neurons()

                neurons += np.sum(
                    weights * spikes.reshape((-1, 1)), axis=0
                )
        """

        self.update(incoming_v)
        return self

    def update(self, incoming_v: np.float):
        """
        Simulate the neurons for one time step and add incoming
        voltage to the neurons membrane potentials.
        Called once per network step.

        Parameters
        ----------
        incoming_v: np.ndarray[neurons, dtype=float]
            Amount to increase each neuron's potential by.

        Examples
        --------

        .. code-block:: python

            config = {
                "magnitude": 2,
                "n_neurons": 100,
                "neuron_pct_inhibitory": .2,
                "potential_decay": .2,
                "prob_rand_fire": .08,
                "refractory_period": 1,
            }
            neurons = Neuron(**config)
            neurons.reset()

            weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons]))

            for i in range(100):
                spikes = self.neurons()

                neurons.update(np.sum(
                    weights * spikes.reshape((-1, 1)), axis=0
                ))
        """
        self.refractory_timers -= 1

        self.schedule = np.vstack(
            (self.schedule[1:], np.zeros(shape=self._n_neurons)))

        self.potentials[np.where(
            self.refractory_timers > 0)] = -65499.0  # finfo('float16').min
        self.potentials[np.where(
            self.refractory_timers == 0)] = self._resting_mv

        decay = 1 - self._potential_decay
        self.potentials = (self.potentials -
                           self._resting_mv) * decay + self._resting_mv

        self.potentials += incoming_v
Exemple #3
0
class CartPole(RL):
    """
    Inverted pendulum / pole-cart / cart-pole reinforcement learning

    ::

            g=9.8      /
            |         / pole: Length = 1 m
            |        /
            V       /
                   / θ (angle), theta_dot is angular velocity
            ______/_____
            |            | Cart: M = 1 kg
            |____________| ----> x_dot is velocity
            O        O
        L1--------x-------------------L2 x is poxition, with x limits of L1, L2)

    Actions: jerk left, jerk right (AKA bang-bang control)
    Goal: control x position of cart to keep pole close to upright,
    which is when θ = pi/2 (vertical).

    Florian. "Correct equations for the dynamics of the cart-pole system."
    Center for Cognitive and Neural Studies(Coneural), 10 Feb 2007,
    https://coneural.org/florian/papers/05_cart_pole.pdf

    Parameters
    ----------
    preset: str=PRESETS.keys(), default=DEFAULT
        Configuration preset key, default values for game parameters.
    callback: ExperimentCallback, default=None
        Callback to send relevant function call information to.
    kwargs: dict, default=None
        Game parameters for NECESSARY_KEYS. Overrides preset settings.

    Examples
    --------

    .. code-block:: python

        game = CartPole(preset="DEFAULT")
        game.seed(0)

        state = game.reset()
        for _ in range(100):
            action = model.get_action(state)
            state, reward, done, info = game.step(action)
            if done:
                break

        game.close()

    .. code-block:: python

        class game_template(CartPole):
            config = CartPole.PRESETS["DEFAULT"]

            config.update({  # Overrides preset values
                "param1": 1
                "param2": 2,
            })

        kwargs = {
            "param1": 0,  # Overrides game_template.config["param1"]
        }
        game = game_template(**kwargs)
        game.seed(0)

        state = game.reset()
        for _ in range(100):
            action = model.get_action(state)
            state, reward, done, info = game.step(action)
            if done:
                break

        game.close()
    """

    action_space = np.arange(-1, 1, 0.1)
    observation_space = None  # Defined in init

    metadata = {"render.modes": ["human"]}

    NECESSARY_KEYS = [
        Key("x_max", "If abs(x) > x_max: game over", float),
        Key("theta_max", "if abs(theta) > theta_max: game over", float),
        Key("x_init_range", "list[float] Range of initial x values.", list),
        Key("theta_init_range", "list[float] Range of initial theta values.", list),
        Key("xdot_init_range", "list[float] Range of initial x_dot values.", list),
        Key(
            "thetadot_init_range",
            "list[float] Range of initial theta_dot values.",
            list,
        ),
        Key("g", "Force of gravity", float, default=9.8),
        Key("Mass_Cart", "Mass of cart", float, default=1.0),
        Key("Mass_Pole", "Mass of the pole", float, default=0.1),
        Key("pole_half_length", "Half of the length of the pole", float, default=0.5),
        Key("Force_Mag", "Force of push", float, default=10.0),
        Key("Tau", "Time interval for updating the values", float, default=0.02),
    ]
    PRESETS = {
        "DEFAULT": {
            "xdot_init_range": [-0.1, 0.1],
            "thetadot_init_range": [-0.1, 0.1],
            "x_init_range": [0.0, 0.0],
            "theta_init_range": [0.0, 0.0],
            "Tau": 0.02,
            "x_max": 4.5,
            "theta_max": 0.5 * np.pi,
        },
        "FREMAUX": {
            "xdot_init_range": [-0.1, 0.1],
            "thetadot_init_range": [-0.1, 0.1],
            "x_init_range": [0.0, 0.0],
            "theta_init_range": [0.0, 0.0],
            "Tau": 0.02,
            "x_max": 2.5,
            "theta_max": 0.5 * np.pi,
        },
    }

    def __init__(self, preset: str = "DEFAULT", callback: object = None, **kwargs):
        super().__init__(preset=preset, callback=callback, **kwargs)

        high = np.array(
            [
                self.params["x_max"],
                np.finfo(np.float32).max,
                self.params["theta_max"],
                np.finfo(np.float32).max,
            ],
            dtype=np.float32,
        )

        self.observation_space = NotImplemented

    def step(self, action: np.ndarray) -> (np.ndarray, 0, bool, {}):
        """
        Act within the environment.

        Parameters
        ----------
        action: np.ndarray
            Force pushing in each direction, eg
            [.5, .5] = 0N of force,
            [1., 0.] = 1N of force directed left,
            [0., 1.] = 1N of force directed right.

        Returns
        -------
        state: ndarray[4, float]=(x, x', theta, theta')
            State updated according to action taken.
        reward: float, = 0
            Reward given by environment.
        done: bool
            Whether the game is done or not.
        info: dict, = {}
            Information of environment.

        Examples
        --------

        .. code-block:: python

            game = Cartpole(preset="DEFAULT")
            game.seed(0)

            state = game.reset()
            for _ in range(100):
                action = model.get_action(state)
                state, reward, done, info = game.step(action)
                if done:
                    break

            game.close()
        """
        PoleMass_Length = self.params["Mass_Pole"] * self.params["pole_half_length"]
        Total_Mass = self.params["Mass_Cart"] + self.params["Mass_Pole"]
        Fourthirds = 4.0 / 3.0

        #
        if hasattr(action, "__len__") and len(action) > 1:
            force = np.dot(action, [-1, 1]) * self.params["Force_Mag"]
        else:
            force = action
        # force = [-1, 1][np.argmax(action)] * self.params['Force_Mag']

        assert force < self.params["Force_Mag"] * 1.2, "Action force too high."

        x, x_dot, theta, theta_dot = self._state

        temp = (
            force + PoleMass_Length * theta_dot * theta_dot * np.sin(theta)
        ) / Total_Mass

        thetaacc = (self.params["g"] * np.sin(theta) - np.cos(theta) * temp) / (
            self.params["pole_half_length"]
            * (
                Fourthirds
                - self.params["Mass_Pole"] * np.cos(theta) * np.cos(theta) / Total_Mass
            )
        )

        xacc = temp - PoleMass_Length * thetaacc * np.cos(theta) / Total_Mass

        # Update the four state variables, using Euler's method:
        # https://en.wikipedia.org/wiki/Euler_method
        x = x + self.params["Tau"] * x_dot
        x_dot = x_dot + self.params["Tau"] * xacc
        theta = theta + self.params["Tau"] * theta_dot
        theta_dot = theta_dot + self.params["Tau"] * thetaacc

        state_new = np.array([x, x_dot, theta, theta_dot])

        ##
        x, x_dot, theta, theta_dot = state_new

        f = abs(x) > self.params["x_max"] or abs(theta) > self.params["theta_max"]

        rwd = 0
        info = {}

        self.callback.game_step(action, self._state, state_new, rwd, f, info)
        self._state = state_new
        return state_new, rwd, f, info

    def reset(self) -> np.ndarray:
        """
        Reset environment.

        Returns
        -------
        ndarray[4, float]=(x, x', theta, theta') Initial game state randomly generated in bounds,
        (*x_init_range * [-1 or 1], *x_dot_init_range * [-1 or 1], *theta_init_range * [-1 or 1], *thetadot_init_range * [-1 or 1]).

        Examples
        --------

        .. code-block:: python

            game = Cartpole(preset="DEFAULT")
            game.seed(0)

            state = game.reset()
        """
        x = np.random.uniform(*self.params["x_init_range"]) * np.random.choice([-1, 1])
        x_dot = np.random.uniform(*self.params["xdot_init_range"]) * np.random.choice(
            [-1, 1]
        )
        theta = np.random.uniform(*self.params["theta_init_range"]) * np.random.choice(
            [-1, 1]
        )
        theta_dot = np.random.uniform(
            *self.params["thetadot_init_range"]
        ) * np.random.choice([-1, 1])

        s = np.array([x, x_dot, theta, theta_dot])

        self.callback.game_reset(s)
        self._state = s
        return s

    def render(self, states: np.ndarray, mode: str = "human"):
        """Renders the environment.
        The set of supported modes varies per environment. (And some
        environments do not support rendering at all.) By convention,

        .. note::

            Make sure that your class's metadata 'render.modes' key includes
              the list of supported modes. It's recommended to call super()
              in implementations to use the functionality of this method.

        .. code-block:: python

            class MyEnv(Env):
                metadata = {'render.modes': ['human', 'rgb_array']}
                def render(self, mode='human'):
                    if mode == 'rgb_array':
                        return np.array(...) # return RGB frame suitable for video
                    elif mode == 'human':
                        ... # pop up a window and render
                    else:
                        super(MyEnv, self).render(mode=mode) # just raise an exception

        Parameters
        ----------
        mode (str, in ['human']): the mode to render with


        Examples
        --------

        .. code-block:: python

            game = Cartpole(preset="DEFAULT")
            game.seed(0)

            state = game.reset()
            for _ in range(100):
                action = model.get_action(state)
                state, reward, done, info = game.step(action)
                if done:
                    break

            game.render()
            game.close()
        """

        def initGraph():
            """
            Init for animated graph below
            """
            line.set_data([], [])
            return (line,)

        def animate(i):
            """
            Each step/refresh of the animatd graph. This sort of gets "looped".
            """
            thisx = [x1[i], x2[i]]
            thisy = [y1, y2[i]]
            line.set_data(thisx, thisy)
            return (line,)

        import matplotlib.pyplot as plt
        import matplotlib.animation as animation

        toPlot = states

        xList = [state[0] for state in toPlot]
        thetaList = [state[2] for state in toPlot]
        x1 = xList
        y1 = 0
        x2 = 1 * np.sin(thetaList) + x1
        y2 = 1 * np.cos(thetaList) + y1

        fig = plt.figure()
        ax = plt.axes(xlim=(-4, 4), ylim=(-0.25, 1.25))
        ax.grid()
        (line,) = ax.plot([], [], "o-", lw=2)
        animation.FuncAnimation(
            fig,
            animate,
            np.arange(1, len(xList)),
            interval=30,
            blit=True,
            init_func=initGraph,
        )
        plt.show()
Exemple #4
0
class EvolveNetwork(MetaRL):
    """
    An environment to tune spiking neural network parameters on a RL game.

    GENOTYPE_CONSTRAINTS are parameterized by the user with the genotype_constraints init parameter.
    Networks are parameterized with a combination of their genotype and
    original config with the genotype taking priority.
    See constraint docs in spikey/meta/series.

    Parameters
    ----------
    kwargs: dict, default=None
        Game parameters for NECESSARY_KEYS. Overrides preset settings.

    Examples
    --------

    .. code-block:: python

        metagame = EvolveNetwork()
        game.seed(0)
        for _ in range(100):
            genotype = [{}, ...]
            fitness, done = metagame.get_fitness(genotype)
            if done:
                break
        game.close()

    .. code-block:: python

        metagame = EvolveNetwork(**metagame_config)
        game.seed(0)
        population = Population(... metagame, ...)
        # population main loop
    """

    NECESSARY_KEYS = MetaRL.extend_keys([
        Key(
            "training_loop",
            "Pre-configured trainingloop to run and gauge fitness of.",
        ),
        Key(
            "genotype_constraints",
            "A constraint for every trainingloop parameter that should be trained. "
            + "See constraint docs in spikey/meta/series.",
            dict,
        ),
        Key(
            "static_updates",
            "Updates to a specific network or game parameter. " +
            "Used in meta.Series, see series configuration for details.",
            default=None,
        ),
        Key("n_reruns",
            "Number of times to rerun each experiment.",
            int,
            default=2),
        Key("win_fitness", "Fitness threshold necessary to terminate MetaRL.",
            float),
        Key(
            "fitness_getter",
            "f(net, game, results, info)->float Function to determine experiment fitness.",
        ),
        Key(
            "fitness_aggregator",
            "f([fitness, ..])->float Aggregate fitnesses of each experiment rerun.",
            default=np.mean,
        ),
    ])
    GENOTYPE_CONSTRAINTS = {}

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.GENOTYPE_CONSTRAINTS = self._genotype_constraints

    def get_fitness(self, genotype: dict) -> (float, bool):
        """
        Train a neural network on an RL environment to gauge its fitness.

        Parameters
        ----------
        genotype: dict
            Dictionary with values for each key in GENOTYPE_CONSTRAINTS.

        Returns
        -------
        fitness: float
            Fitness of genotype given.
        done: bool
            Whether termination condition has been reached or not.

        Examples
        --------

        .. code-block:: python

            metagame = EvolveNetwork()
            game.seed(0)
            for _ in range(100):
                genotype = [{}, ...]
                fitness, done = metagame.get_fitness(genotype)
                if done:
                    break
            game.close()
        """
        training_loop = self._training_loop.copy()
        training_loop.reset(**genotype, **self.params)
        series = Series(
            training_loop,
            self._static_updates,
            backend=SingleProcessBackend(),
        )

        tracking = []
        for experiment in series:
            for _ in range(self._n_reruns):
                network, game, results, info = experiment()
                tracking.append(
                    self._fitness_getter(network, game, results, info))

        fitness = self._fitness_aggregator(tracking)
        terminate = fitness >= self._win_fitness

        return fitness, terminate
Exemple #5
0
class Input(Module):
    """
    Spike based stimulus encoding.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        processing_time = 10
        config = {
            "n_inputs": 10,
            "magnitude": 2,
            "input_firing_steps": -1,
            "input_pct_inhibitory": 0.2,
        }
        input = Input(**config)
        input.reset()
        env = Logic(preset='XOR')

        state = env.reset()
        for step in range(10):
            input.update(state)

            for _ in range(processing_time)
                in_fires = input()

            state, _, done, __ = env.update(0)

            if done:
                break

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_inputs": 10,
                "magnitude": 2,
                "input_firing_steps": -1,
                "input_pct_inhibitory": 0.2,
            }
            parts = {
                "inputs": Input
            }
    """

    NECESSARY_KEYS = [
        Key("n_inputs", "Number input neurons, separate from body.", int),
        Key("magnitude", "Multiplier to each 0, 1 spike value.", float),
        Key(
            "input_firing_steps",
            "Number of network steps to fire for, -1 if all.",
            int,
            default=-1,
        ),
        Key(
            "input_pct_inhibitory",
            "Pct of inputs that are inhibitory",
            float,
            default=0,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        self.polarities = np.where(
            np.random.uniform(0, 1, self._n_inputs) > self._input_pct_inhibitory, 1, -1
        )

        self.values = self.network_time = None

    def __len__(self) -> int:
        """
        Size of input generator == number inputs.
        """
        return self._n_inputs

    def __call__(self) -> np.bool:
        """
        Spikes output from each input neuron, called once per network step.

        Returns
        -------
        ndarray[n_inputs, bool] Spike output for each neuron.
        """
        self.network_time += 1
        raise NotImplementedError("Input gen __call__ function not implemented!")

    def reset(self):
        """
        Reset Input.
        Called at the start of each episode.
        """

    def update(self, state: object):
        """
        Update input generator, called once per game step.

        Parameters
        ----------
        state: object
            Enviornment state in format generator can understand.
        """
        self.network_time = 0

        try:
            self.values = tuple(state)
        except TypeError:
            self.values = state
Exemple #6
0
class ActiveRLNetwork(RLNetwork):
    """
    The foundation for building and handling spiking neural networks.
    Network serves as the container and manager of all SNN parts like
    the neurons, synapses, reward function, ... It is designed to
    interact with an RL environment.

    .. note::
        There are a few types of Networks for different uses, this
        one is the base for reinforcement learning with SNNs giving reward
        at every network step(see RLNetwork for reward per game step).

    Parameters
    ----------
    callback: ExperimentCallback, default=None
        Callback to send relevant function call information to for logging.
    game: RL, default=None
        The environment the network will be interacting with, parameter
        is to allow network to pull relevant parameters in init.
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        experiment_params = {
            "n_episodes": 100,
            "len_episode": 200,
        }

        parts = {
            "inputs": snn.input.Input,
            "neurons": snn.neuron.Neuron,
            "weights": snn.weight.Weight,
            "synapses": snn.synapse.Synapse,
            "readout": snn.readout.Readout,
            "rewarder": snn.reward.Reward,
            "modifiers": None, # [snn.modifier.Modifier,]
        }
        params = {
            "n_inputs": 10,
            "n_outputs": 10,
            "n_neurons": 50,
            "processing_time": 200,
            # + all part parameters, see Network.list_keys(**parts)
        }
        config = {**parts, **params}

        game = Logic(preset="XOR", **config)
        network = RLNetwork(game=game, **config)

        for _ in range(experiment_params["n_episodes"]):
            network.reset()
            state = game.reset()
            state_next = None

            for s in range(experiment_params["len_episode"]):
                action = network.tick(state)

                state_next, _, done, __ = game.step(action)

                # Calculated reward per env step, does not affect network
                # Actual rewarding handled in ActiveRLNetwork.tick().
                reward = network.reward(state, action, state_next)
                state = state_next

                if done:
                    break

    .. code-block:: python

        experiment_params = {
            "n_episodes": 100,
            "len_episode": 200,
        }

        class network_template(ActiveRLNetwork):
            parts = {
                "inputs": snn.input.Input,
                "neurons": snn.neuron.Neuron,
                "weights": snn.weight.Weight,
                "synapses": snn.synapse.Synapse,
                "readout": snn.readout.Readout,
                "rewarder": snn.reward.Reward,
                "modifiers": None, # [snn.modifier.Modifier,]
            }
            keys = {
                "n_inputs": 10,
                "n_outputs": 10,
                "n_neurons": 50,
                "processing_time": 200,
                # + all part parameters, see Network.list_keys(**parts)
            }

        kwargs = {
            "n_neurons": 100,  # Overrides n_neurons in network_template.keys
        }

        game = Logic(preset="XOR", **kwargs)
        network = network_template(game=game, **kwargs)

        for _ in range(experiment_params["n_episodes"]):
            network.reset()
            state = game.reset()
            state_next = None

            for s in range(experiment_params["len_episode"]):
                action = network.tick(state)

                state_next, _, done, __ = game.step(action)

                # Calculated reward per env step, does not affect network
                # Actual rewarding handled in ActiveRLNetwork.tick().
                reward = network.reward(state, action, state_next)
                state = state_next

                if done:
                    break
    """

    NECESSARY_KEYS = RLNetwork.extend_keys([
        Key(
            "continuous_rwd_action",
            "f(network, state)->any Function to get action parameter for rewarder when using continuous_reward.",
        )
    ])

    def reward(self,
               state: object,
               action: object,
               state_next: object,
               reward: float = None) -> float:
        """
        If reward given as parameter and DON'T apply reward to synapses.
        Otherwise rewarder calculates based on state and action.
        Called once per game step.

        Parameters
        ----------
        state: any
            State of environment where action was taken.
        action: any
            Action taken by network in response to state.
        state_next: any
            State of environment after action was taken.
        reward: float, default=None
            Reward already calculated, if None it will be determined by the rewarder.

        Returns
        -------
        float Reward calculated for taking action in state.

        Examples
        --------

        .. code-block:: python

            experiment_params = {
                "n_episodes": 100,
                "len_episode": 200,
            }

            class network_template(ActiveRLNetwork):
                parts = {
                    "inputs": snn.input.Input,
                    "neurons": snn.neuron.Neuron,
                    "weights": snn.weight.Weight,
                    "synapses": snn.synapse.Synapse,
                    "readout": snn.readout.Readout,
                    "modifiers": None, # [snn.modifier.Modifier,]
                }
                keys = {
                    "n_inputs": 10,
                    "n_outputs": 10,
                    "n_neurons": 50,
                    "processing_time": 200,
                    # + all part parameters, see Network.list_keys(**parts)
                }

            kwargs = {
                "n_neurons": 100,  # Overrides n_neurons in network_template.keys
            }

            game = Logic(preset="XOR", **kwargs)
            network = network_template(game=game, **kwargs)

            for _ in range(experiment_params["n_episodes"]):
                network.reset()
                state = game.reset()
                state_next = None

                for s in range(experiment_params["len_episode"]):
                    action = network.tick(state)

                    state_next, _, done, __ = game.step(action)

                    # Calculated reward per env step, does not affect network
                    # Actual rewarding handled in ActiveRLNetwork.tick().
                    reward = network.reward(state, action, state_next)
                    state = state_next

                    if done:
                        break
        """
        self.callback.network_reward(state, action, state_next, reward)
        return reward

    def continuous_reward(self, state: object, reward: float = None) -> float:
        """
        If reward given as parameter, apply reward to synapses.
        Otherwise rewarder calculates based on state and action, then applies to synapses.
        Continuous reward meant to be applied per network step.

        Parameters
        ----------
        state: any
            State of environment where action was taken.
        reward: float, default=None
            Reward to give network, if None it will be determined by the rewarder.

        Returns
        -------
        float Reward given to network.

        Examples
        --------

        .. code-block:: python

            experiment_params = {
                "n_episodes": 100,
                "len_episode": 200,
            }

            class network_template(ActiveRLNetwork):
                parts = {
                    "inputs": snn.input.Input,
                    "neurons": snn.neuron.Neuron,
                    "weights": snn.weight.Weight,
                    "synapses": snn.synapse.Synapse,
                    "readout": snn.readout.Readout,
                    "modifiers": None, # [snn.modifier.Modifier,]
                }
                keys = {
                    "n_inputs": 10,
                    "n_outputs": 10,
                    "n_neurons": 50,
                    "processing_time": 200,
                    # + all part parameters, see Network.list_keys(**parts)
                }

            kwargs = {
                "n_neurons": 100,  # Overrides n_neurons in network_template.keys
            }

            game = Logic(preset="XOR", **kwargs)
            network = network_template(game=game, **kwargs)

            for _ in range(experiment_params["n_episodes"]):
                network.reset()
                state = game.reset()
                state_next = None

                for s in range(experiment_params["len_episode"]):
                    action = network.tick(state)

                    state_next, _, done, __ = game.step(action)

                    # Calculated reward per env step, does not affect network
                    # Actual rewarding handled in ActiveRLNetwork.tick().
                    reward = network.reward(state, action, state_next)
                    state = state_next

                    if done:
                        break
        """
        action = self._continuous_rwd_action(self, state)

        reward = reward if reward is not None else self.rewarder(
            state, action, None)

        self.synapses.reward(reward)

        self.callback.network_continuous_reward(state, action, reward)
        return reward

    def tick(self, state: object) -> object:
        """
        Determine network response to given stimulus.

        Parameters
        ----------
        state: any
            Current environment state.

        Returns
        -------
        any Network response to stimulus.

        Examples
        --------

        .. code-block:: python

            experiment_params = {
                "n_episodes": 100,
                "len_episode": 200,
            }

            class network_template(ActiveRLNetwork):
                parts = {
                    "inputs": snn.input.Input,
                    "neurons": snn.neuron.Neuron,
                    "weights": snn.weight.Weight,
                    "synapses": snn.synapse.Synapse,
                    "readout": snn.readout.Readout,
                    "modifiers": None, # [snn.modifier.Modifier,]
                }
                keys = {
                    "n_inputs": 10,
                    "n_outputs": 10,
                    "n_neurons": 50,
                    "processing_time": 200,
                    # + all part parameters, see Network.list_keys(**parts)
                }

            kwargs = {
                "n_neurons": 100,  # Overrides n_neurons in network_template.keys
            }

            game = Logic(preset="XOR", **kwargs)
            network = network_template(game=game, **kwargs)

            for _ in range(experiment_params["n_episodes"]):
                network.reset()
                state = game.reset()
                state_next = None

                for s in range(experiment_params["len_episode"]):
                    action = network.tick(state)

                    state_next, _, done, __ = game.step(action)

                    # Calculated reward per env step, does not affect network
                    # Actual rewarding handled in ActiveRLNetwork.tick().
                    reward = network.reward(state, action, state_next)
                    state = state_next

                    if done:
                        break
        """

        self._polarities = np.append(self.inputs.polarities,
                                     self.neurons.polarities)

        self._spike_log[:self.synapses._stdp_window] = self._spike_log[
            -self.synapses._stdp_window:]
        self._normalized_spike_log = self._spike_log.astype(bool)

        self.inputs.update(state)

        if self.modifiers is not None:
            for modifier in self.modifiers:
                modifier.update(self)

        for i in range(self._processing_time):
            self._process_step(i, state)

            self.continuous_reward(state, None)

        outputs = self._spike_log[-self._processing_time:, -self._n_outputs:]
        output = self.readout(outputs)

        self.callback.network_tick(state, output)
        return output
Exemple #7
0
class Synapse(Module):
    """
    Hedonistic synapses updating weights based on stdp suggestions.
    The weight matrix defines how much charge from pre-synaptic neurons
    goes to which post-synaptic neurons. The weight matrix is stored in
    and managed by the Weight class, stored in Synapse as self.weight.
    Synapse defines the learning behavior of the synapses(weights) of
    the network based on neuron spike times.

    The Spike-timing-dependent synaptic plasticity(STDP) learning algorithm is
    a variant of the fire together wire together rule. Similar to hebbian learning,
    for any synapse, if the pre-synaptic neuron tends to fire soon before the
    post-synaptic neuron, the synapses weight will increase. If the opposite
    tends to happen, post before pre firings, the weight will decrease. Often times
    the eligability trace of some sparse variable(eg dopaime reward) is tracked and
    is used as a factor of the update rule along with learning rate.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        w_config = {
            "n_neurons": 50,
            "n_inputs": 0,
            "matrix": np.random.uniform(size=(10, 10)),
        }
        w = Manual(**config)

        config = {
            "n_neurons": 50,
            "n_inputs": 0,
            "stdp_window": 200,
            "learning_rate": .05,
            "trace_decay": .1,
        }
        synapse = Synapse(w, **config)
        synapse.reset()

        pre_fires = np.random.uniform(size=config['n_neurons']) <= .08
        post_fires = np.matmul(w.matrix, pre_fires) >= 2
        spike_log = np.vstack((post_fires, pre_fires))
        synapse.update(spike_log, np.zeros(config['n_neurons]))

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_neurons": 50,
                "n_inputs": 10,
                "stdp_window": 200,
                "learning_rate": .05,
                "trace_decay": .1,
            }
            parts = {
                "synapses": Synapse
            }
    """

    NECESSARY_KEYS = [
        Key("n_neurons", "Number of neurons in network.", int),
        Key("n_inputs", "Number input neurons, separate from body.", int),
        Key("stdp_window", "Time period that stdp will take effect.", int),
        Key("learning_rate", "Scalar to trace updates.", float),
        Key("trace_decay", "Percent to decay trace by per timestep.", float, default=1),
    ]

    def __init__(self, w: object, **kwargs):
        super().__init__(**kwargs)

        self.weights = w

        self.trace = None

    def reset(self):
        """
        Reset Synapse member variables.
        Called at the start of each episode.
        """
        self.trace = np.zeros(
            shape=(self._n_inputs + self._n_neurons, self._n_inputs + self._n_neurons),
            dtype=np.float32,
        )

    def _hebbian(self, pre_locs, post_locs, dts, inverse=False):
        """
        Consise implementation of the core hebbian ltp/ltd rule.

        Parameters
        ----------
        pre_locs: np.int
            Locations of pre-synaptic fires.
        post_locs: np.int
            Locations of post-synaptic fires.
        dts: np.float[n_neurons]
            Per neuron totals of the per-fire STDP credit to give.
        inverse: bool, default=False
            To apply LTD(anti-hebbian) instead of LTP.
        """
        if not inverse:
            pre_locs = pre_locs.reshape((-1, 1))
            body_post_locs = post_locs[post_locs >= self._n_inputs] - self._n_inputs
            self.weights._matrix[pre_locs, body_post_locs] += dts[pre_locs]
        if inverse:
            post_locs = post_locs.reshape((-1, 1))
            body_pre_locs = pre_locs[pre_locs >= self._n_inputs] - self._n_inputs
            self.weights._matrix[post_locs, body_pre_locs] -= dts[self._n_inputs :][
                body_pre_locs
            ].reshape((1, -1))

    def _decay_trace(self):
        """
        Decay eligability trace.
        """
        ## Pre-computing ssaves a considerable amount of time!
        mul = 1 - self._trace_decay

        self.trace *= mul

    def _apply_stdp(self, spike_log: np.bool, inhibitories: np.bool):
        """
        Update synaptic weights via STDP rule.

        Parameters
        ----------
        spike_log: np.array(time, neurons), 0 or 1
            A history of neuron firings with spike_log[-1] is most recent.
        inhibitories: list[int], -1 or 1
            Neuron polarities.
        """
        raise NotImplementedError("Update trace function needs to be implemented!")

    def update(self, spike_log: np.bool, inhibitories: np.int) -> None:
        """
        Update trace for one time step based on decay rule and STDP suggestions.
        Called once per network step.

        Parameters
        ----------
        spike_log: np.array(time, neurons)
            A history of when neurons have spiked, 1 at spike, 0 quiescent with spike_log[-1] is most recent.
        inhibitories: np.array(neurons)
                The polarity, 1 or -1, of each nueron
        """
        if self.training:
            self._apply_stdp(spike_log, inhibitories)

        self._decay_trace()
Exemple #8
0
class Random(Weight):
    """
    Randomly generated network.
    The data structure to generate and manage connections between neurons.
    Contains generation, arithmetic and get operations.
    Updates are handled in spikey.snn.Synapse objects.

    .. note::
        Weight._matrix must be a masked ndarray with fill_value=0 while Weight.matrix
        is a simple ndarray.

        Arithmetic operations(a * b) use unmasked matrix for speed while inplace(a += b)
        arithmetic uses masked values.

        Get operations(Weight[[1, 2, 3]]) apply to masked ndarray.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "n_inputs": 1,
            "n_neurons": 10,
            "max_weight": 3,
            "force_unidirectional": True,
            "weight_generator": lambda *a, **kw: np.random.uniform(0, 3, *a, **kw),
            "matrix_mask": np.random.uniform(size=(1+10, 10)) <= .2,
        }
        w = Random(**config)

        in_volts = w * np.ones(config['n_neurons'])

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_inputs": 1,
                "n_neurons": 10,
                "max_weight": 3,
                "force_unidirectional": True,
                "weight_generator": lambda *a, **kw: np.random.uniform(0, 3, *a, **kw),
                "matrix_mask": np.random.uniform(size=(1+10, 10)) <= .2,
            }
            parts = {
                "weights": Random
            }
    """

    NECESSARY_KEYS = Weight.extend_keys([
        Key(
            "force_unidirectional",
            "bool Whether or not to force matrix unidirectional.",
            bool,
            default=False,
        ),
        Key(
            "weight_generator",
            "f(size: int, shape: 2 tuple)->ndarray Function to generate weights.",
        ),
        Key(
            "matrix_mask",
            "np.bool[inputs+neurons, neurons  OR neurons, neurons] or None. True=generate weights, False=empty.",
            (np.ndarray, list, type(None)),
        ),
    ])

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        if self._matrix_mask is None:
            input_weights = self._weight_generator(
                (self._n_inputs, self._n_neurons))
            body_weights = self._weight_generator(
                (self._n_neurons, self._n_neurons))
        else:
            if isinstance(self._matrix_mask, list) and isinstance(
                    self._matrix_mask[0], np.ndarray):
                self._matrix_mask = self._convert_feedforward(
                    self._matrix_mask)

            mask = self._matrix_mask.astype(bool)
            if mask.shape == (self._n_neurons, self._n_neurons):
                input_weights = self._weight_generator(
                    (self._n_inputs, self._n_neurons))
                body_weights = generate_masked(self._weight_generator, mask)
            elif mask.shape == (self._n_inputs + self._n_neurons,
                                self._n_neurons):
                input_weights = generate_masked(self._weight_generator,
                                                mask[:self._n_inputs])
                body_weights = generate_masked(self._weight_generator,
                                               mask[self._n_inputs:])
            else:
                self._assert_matrix_shape(self._matrix_mask, key="matrix_mask")

        self._matrix = np.vstack((input_weights, body_weights))

        diagonal = np.arange(self._n_neurons)
        self._matrix[diagonal + self._n_inputs, diagonal] = 0.0

        if self._force_unidirectional:
            for x in range(self._n_neurons):
                for y in range(x, self._n_neurons):
                    if (not self._matrix[x + self._n_inputs, y]
                            or not self._matrix[y + self._n_inputs, x]):
                        continue

                    if np.random.randint(0, 2):
                        self._matrix[x + self._n_inputs, y] = 0.0
                    else:
                        self._matrix[y + self._n_inputs, x] = 0.0

        self._matrix *= self._max_weight

        self._matrix = np.clip(self._matrix, 0, self._max_weight)
        self._matrix = np.ma.array(self._matrix,
                                   mask=(self._matrix == 0),
                                   fill_value=0)

        self._assert_matrix_shape(self._matrix, key="matrix")
Exemple #9
0
class NeuronRates(Readout):
    """
    Translator from output neuron spike trains to actions
    for the environment. Actions set are neuron firing rates.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "n_outputs": 10,
            "magnitude": 2,
            "n_actions": 1,
        }
        readout = NeuronRates(**config)
        readout.reset()

        action = readout(np.ones((10, config["n_outputs"])))

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_outputs": 10,
                "magnitude": 2,
                "n_actions": 1,
            }
            parts = {
                "readout": NeuronRates
            }
    """

    NECESSARY_KEYS = Readout.extend_keys(
        [
            Key(
                "n_actions",
                "Number of groups to put neurons into. 0 pools means each neuron separate output.",
                int,
            ),
        ]
    )

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        if self._n_actions == 0:
            self._n_actions = self._n_outputs

    def __call__(self, output_spike_train: np.bool) -> np.float:
        """
        Interpret the output neuron's spike train into pool firing rates.
        Called once per game step.

        Parameters
        ----------
        output_spike_train: np.ndarray[t, n_neurons, dtype=bool]
            Spike train with train[-1] being the most recent time.

        Returns
        -------
        ndarray[n_action, dtype=float] Firing rate of each neuron pool.
        """
        if self._n_outputs == 0:
            return 0

        idx = np.linspace(0, self._n_outputs, self._n_actions + 1).astype(int)
        pools = [
            output_spike_train[:, idx[i] : idx[i + 1]] for i in range(self._n_actions)
        ]
        return np.mean(pools, axis=(1, 2))
Exemple #10
0
class StaticMap(Input):
    """
    Custom state - input firings map.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        processing_time = 10
        config = {
            "n_inputs": 10,
            "magnitude": 2,
            "input_firing_steps": -1,
            "input_pct_inhibitory": 0.2,
            "state_spike_map": {
                (1, 0): np.random.uniform(20, 10) <= .8,
                (.5, .5): np.random.uniform(20, 10) <= .3
            },
        }
        input = StaticMap(**config)
        input.reset()
        env = Logic(preset='XOR')

        state = env.reset()
        for step in range(10):
            input.update(state)

            for _ in range(processing_time)
                in_fires = input.__call__()

            state, _, done, __ = env.update(0)

            if done:
                break

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_inputs": 10,
                "magnitude": 2,
                "input_firing_steps": -1,
                "input_pct_inhibitory": 0.2,
                "state_spike_map": {
                    'state1': np.random.uniform(20, 10) <= .5,
                    'state2': np.random.uniform(20, 10) <= .5
                    },
            }
            parts = {
                "inputs": StaticMap
            }
    """

    NECESSARY_KEYS = Input.extend_keys([
        Key(
            "state_spike_map",
            "dict[tuple]->ndarray[processing_time, n_inputs, dtype=bool] State to fires map..",
            type=(dict, np.ndarray),
        )
    ])

    def __call__(self) -> np.bool:
        """
        Spikes output from each input neuron, called once per network step.

        Returns
        -------
        ndarray[n_inputs, dtype=bool] Spike output for each neuron.
        """
        output = np.array(self._state_spike_map[self.values])

        if len(output.shape) > 1:
            spikes = [
                value * self._magnitude for value in output[self.network_time]
            ]
        else:
            spikes = [value * self._magnitude for value in output]

        self.network_time += 1
        return np.array(spikes) * self.polarities
Exemple #11
0
class Reward(Module):
    """
    Determine reward to give agent. Reward in a spiking neural
    network is meant to simulate dopamine in the real brain.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "reward_mult": 1,
            "punish_mult": -2,
        }
        rewarder = Reward(**config)
        rewarder.reset()

        r = rewarder(state, action, state_next)

    .. code-block:: python

        class network_template(Network):
            keys = {
                "reward_mult": 1,
                "punish_mult": -2,
            }
            parts = {
                "rewarder": Reward
            }
    """

    NECESSARY_KEYS = [
        Key(
            "reward_mult",
            "Multiplier for reward, reward = 1 * reward_mult.",
            float,
            default=1,
        ),
        Key(
            "punish_mult",
            "Multiplier for punishment, punish = -1 * punish_mult.",
            float,
            default=0,
        ),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        if self._punish_mult < 0:
            print(
                "WARNING: Punish mult given is negative meaning you will give positive punishment."
            )

    def reset(self):
        """
        Reset rewarder member variables.
        Called at the start of each episode.
        """
        pass

    def __call__(self, state: object, action: object, state_next: object) -> float:
        """
        Determine how much reward should be given for taking action in state.
        Called once per game or network step based on network chosen.

        Parameters
        ----------
        state: any
            Environment state before action is taken.
        action: any
            Action taken in response to state.
        state_next: any
            State of environment after action was taken.

        Returns
        -------
        float Reward for taking action in state.
        """
        raise NotImplementedError(f"__call__ not implemented for {type(self)}!")
Exemple #12
0
class GenericLoop(TrainingLoop):
    """
    Generic reinforcement learning training loop.

    .. code-block:: python

        for ep in n_episodes:
            while not done or until i == len_episode:
                action = network.tick(state)
                state_next, _, done, __ = game.step(action)
                reward = network.reward(state, action, state_next)
                state = state_next

    Parameters
    ----------
    network_template: Network[type] or Network
        Network to train.
    game_template: RL[type] or RL
        Game to train.
    params: dict
        Network, game and training parameters.

    Examples
    --------

    .. code-block:: python

        experiment = GenericLoop(Network, RL, **config)
        experiment.reset()

        network, game, results, info = experiment()
    """

    NECESSARY_KEYS = TrainingLoop.extend_keys([
        Key("n_episodes", "Number of episodes to run in the experiment.", int),
        Key("len_episode", "Number of environment timesteps in each episode",
            int),
    ])

    def __call__(self) -> (object, object, dict, dict):
        """
        Run training loop a single time.

        Returns
        -------
        network: Network, game: RL, results: dict, info: dict.

        Examples
        --------

        .. code-block:: python

            experiment = TrainingLoop(Network, RL, **config)
            experiment.reset()

            network, game, results, info = experiment()
        """
        network, game = self.init()

        for e in range(self.params["n_episodes"]):
            network.reset()
            state = game.reset()
            state_next = None

            for s in range(self.params["len_episode"]):
                action = network.tick(state)
                state_next, _, done, __ = game.step(action)
                if hasattr(network, "reward") and callable(
                        getattr(network, "reward")):
                    reward = network.reward(state, action, state_next)
                state = state_next

                if done:
                    break

        self.callback.training_end()

        return [*self.callback]
Exemple #13
0
class PopulationVector(Readout):
    """
    Population vector coding readout from output neuron spike trains to actions
    for the environment.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "n_outputs": 10,
            "magnitude": 2,
            "n_actions": 2,
        }
        readout = PopulationVector(**config)
        readout.reset()

        action = readout(np.ones((10, config["n_outputs"])))

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_outputs": 10,
                "magnitude": 2,
                "n_actions": 2,
            }
            parts = {
                "readout": PopulationVector
            }
    """

    NECESSARY_KEYS = Readout.extend_keys([
        Key("n_actions", "Number of action groups.", int),
    ])

    def __call__(self, output_spike_train: np.bool) -> np.float:
        """
        Interpret the output neuron's spike train via population vector coding.
        Called once per game step.

        Parameters
        ----------
        output_spike_train: np.ndarray[t, n_neurons, dtype=bool]
            Spike train with train[-1] being the most recent time.

        Returns
        -------
        ndarray[n_actions, dtype=float] Normalized rate from each output pool.
        """
        if self._n_outputs == 0:
            return np.zeros(self._n_actions)

        spikes = np.where(output_spike_train, 1, 0)
        spike_counts = np.sum(spikes, axis=0)

        group_size = self._n_outputs // self._n_actions

        p = [
            np.sum(spike_counts[i * group_size:(i + 1) * group_size])
            for i in range(self._n_actions)
        ]
        p = np.array(p)

        if np.sum(p) != 0:
            actions = p / np.sum(p)
        else:
            actions = np.ones(p.shape) / p.size

        return actions
Exemple #14
0
class Population(Module):
    """
    An evolving population.
    See genotype constraint docs in spikey/meta/series.

    Parameters
    ----------
    game: MetaRL
        MetaRL game to evolve agents for.
    backend: MetaBackend, default=MultiprocessBackend(max_process)
        Backend to execute experiments with.
    max_process: int, default=16
        Number of separate processes to run experiments for
        default backend.
    kwargs: dict, default=None
        Any configuration, required keys listed in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        metagame = EvolveNetwork(GenericLoop(network, game, **params), **metagame_config,)
        population = Population(metagame, **pop_config)

        while not population.terminated:
            fitness = population.evaluate()

            population.update(fitness)

            print(f"{population.epoch} - Max fitness: {max(fitness)}")
    """

    NECESSARY_KEYS = [
        Key("n_storing", "Number of genotypes to store in cache.", int),
        Key(
            "n_agents",
            "Number of agents in population per epoch.",
            (int, list, tuple, np.ndarray),
        ),
        Key(
            "n_epoch",
            "Number of epochs -- unused if n_agents is iterable.",
            int,
            default=9999,
        ),
        Key(
            "mutate_eligable_pct",
            "(0, 1] Pct of prev agents eligable to be mutated.",
            float,
        ),
        Key(
            "max_age",
            "Max age agent can reach before being removed from mutation/crossover/survivor pools.",
            int,
        ),
        Key(
            "random_rate",
            "(0, 1) Percent agents in population to generate randomly.",
            float,
        ),
        Key(
            "survivor_rate",
            "(0, 1) Percent(new generation) previous generation preserved/turn.",
            float,
        ),
        Key(
            "mutation_rate",
            "(0, 1) Percent(new generation) previous generation mutated/turn.",
            float,
        ),
        Key(
            "crossover_rate",
            "(0, 1) Percent(new generation) previous generation crossed over/turn.",
            float,
        ),
        Key("logging", "Whether to log or not.", bool, default=True),
        Key("log_fn", "f(n, g, r, i, filename) Logging function.", default=log),
        Key("folder", "Folder to save logs to.", str, default="log"),
    ]

    def __init__(
        self,
        game: object,
        backend: object = None,
        max_process: int = 16,
        **config,
    ):
        super().__init__(**config)

        self.genotype_constraints = game.GENOTYPE_CONSTRAINTS
        self.get_fitness = game.get_fitness
        self.backend = backend or MultiprocessBackend(max_process)

        if isinstance(self._n_agents, (list, tuple, np.ndarray)):
            self.n_agents = list(self._n_agents)
        else:
            self.n_agents = [self._n_agents for _ in range(self._n_epoch)]

        self.epoch = 0  # For summaries
        self.terminated = False

        self.cache = GenotypeMapping(self._n_storing)
        self.population = [self._random() for _ in range(self.n_agents[self.epoch])]

        if self._mutate_eligable_pct == 0:
            raise ValueError("mutate_eligable pct cannot be 0!")

        self._normalize_rates()
        if self._logging:
            self._setup_logging(config, game.params)

    def _normalize_rates(self):
        """
        Normalize pertinent algorithm rates to 1.
        """
        total = (
            self._random_rate
            + self._survivor_rate
            + self._mutation_rate
            + self._crossover_rate
        )

        if not total:
            raise ValueError(
                "Need nonzero value for the survivor, mutation or crossover rate."
            )

        self._random_rate /= total
        self._survivor_rate /= total
        self._mutation_rate /= total
        self._crossover_rate /= total

    def _setup_logging(self, pop_params, game_params):
        self.multilogger = MultiLogger(folder=self._folder)

        info = {"population_config": pop_params}
        info.update({"metagame_info": game_params})

        self.multilogger.summarize(results=None, info=info)

    def __len__(self) -> int:
        return len(self.population)

    def _genotype_dist(self, genotype1: dict, genotype2: dict) -> float:
        """
        Testing Population._genotype_dist.

        Parameters
        ----------
        genotype1: genotype
            Genotypes to find the distance between.
        genotype2: genotype
            Genotypes to find the distance between.

        Returns
        -------
        Euclidean distance between the two genotypes.
        """
        total = 0

        for key in self.genotype_constraints.keys():
            if isinstance(genotype1[key], (list, tuple)):
                for i in range(len(genotype1[key])):
                    total += (genotype1[key][i] - genotype2[key][i]) ** 2

                continue

            total += (genotype1[key] - genotype2[key]) ** 2

        return total ** 0.5

    def _random(self) -> dict:
        """
        Randomly generate a genotype given constraints.
        """
        eval_constraint = (
            lambda cons: np.random.uniform(*cons)
            if isinstance(cons, tuple)
            else cons[np.random.choice(len(cons))]
        )

        genotype = {
            key: eval_constraint(constraint)
            for key, constraint in self.genotype_constraints.items()
        }

        genotype["_age"] = 0

        return genotype

    def _mutate(self, genotypes: list) -> list:
        """
        Mutate a random key of each genotype given.
        """
        if not isinstance(genotypes, (list, np.ndarray)):
            genotypes = [genotypes]

        new_genotypes = []

        for genotype in genotypes:
            new_genotype = deepcopy(genotype)  ## prevent edit of original!

            key = np.random.choice(list(self.genotype_constraints.keys()))

            cons = self.genotype_constraints[key]

            if isinstance(cons, tuple):
                new_genotype[key] = np.random.uniform(*cons)
            else:
                new_genotype[key] = cons[np.random.choice(len(cons))]

            new_genotype["_age"] = 0

            new_genotypes.append(new_genotype)

        return new_genotypes

    def _crossover(self, genotype1: dict, genotype2: dict) -> [dict, dict]:
        """
        Crossover two different genotypes.

        Parameters
        ----------
        genotype: dict, str: float
            Genotype.

        Returns
        -------
        2 new genotypes.
        """
        offspring1, offspring2 = {}, {}

        switch = False
        switch_key = np.random.choice(list(self.genotype_constraints.keys()))

        keys = list(self.genotype_constraints.keys())
        np.random.shuffle(keys)  # Prevent bias

        for key in keys:
            if key == switch_key:
                switch = True

            offspring1[key] = genotype1[key] if switch else genotype2[key]
            offspring2[key] = genotype2[key] if switch else genotype1[key]

        offspring1["_age"] = 0
        offspring2["_age"] = 0

        return [offspring1, offspring2]

    def update(self, f: list):
        """
        Update the population based on each agents fitness.

        Parameters
        ----------
        f: list of float
            Fitness values for each agent.
        """
        self.epoch += 1

        try:
            n_agents = self.n_agents[self.epoch]
        except (StopIteration, IndexError):
            self.terminated = True
            return

        prev_gen = [(self.population[i], f[i]) for i in range(len(f))]
        prev_gen = sorted(prev_gen, key=lambda x: x[1])
        prev_gen = [value[0] for value in prev_gen if value[0]["_age"] < self._max_age]

        self.population = []

        self.population += [
            self._random() for _ in range(int(n_agents * self._random_rate))
        ]

        if int(n_agents * self._survivor_rate):  # -0 returns whole list!!
            survivors = [
                deepcopy(genotype)
                for genotype in prev_gen[-int(n_agents * self._survivor_rate) :]
            ]

            for genotype in survivors:
                genotype["_age"] += 1

            self.population += survivors

        mutate_candidates = prev_gen[-int(self._mutate_eligable_pct * len(prev_gen)) :]
        self.population += self._mutate(
            [
                deepcopy(genotype)
                for genotype in np.random.choice(
                    mutate_candidates, size=int(n_agents * self._mutation_rate)
                )
            ]
        )

        for _ in range(int(n_agents * self._crossover_rate) // 2):
            genotype1 = np.random.choice(prev_gen)
            genotype2 = np.random.choice(prev_gen)

            self.population += self._crossover(deepcopy(genotype1), deepcopy(genotype2))

        if len(self) < n_agents:
            diff = n_agents - len(self)

            self.population += self._mutate(np.random.choice(prev_gen, size=diff))

    def evaluate(self) -> list:
        """
        Evaluate each agent on the fitness function.

        Returns
        -------
        Fitness values for each agent.
        """
        params = [
            (
                self.get_fitness,
                self.cache,
                genotype,
                self._log_fn,
                next(self.multilogger.filename_generator) if self._logging else None,
            )
            for genotype in self.population
        ]

        results = self.backend.distribute(run, params)

        fitnesses = [result[0] for result in results]
        terminated = [result[1] for result in results]

        if any(terminated):
            self.terminated = True

        return fitnesses
Exemple #15
0
class Network(Module):
    """
    The foundation for building and handling spiking neural networks.
    Network serves as the container and manager of all SNN parts like
    the neurons, synapses, reward function, ... It is designed to
    interact with an RL environment.

    .. note::
        There are a few types of Networks for different uses, this
        one is the base template for any generic usage.

    Parameters
    ----------
    callback: ExperimentCallback, default=None
        Callback to send relevant function call information to for logging.
    game: RL, default=None
        The environment the network will be interacting with, parameter
        is to allow network to pull relevant parameters in init.
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        experiment_params = {
            "n_episodes": 100,
            "len_episode": 200,
        }

        parts = {
            "inputs": snn.input.Input,
            "neurons": snn.neuron.Neuron,
            "weights": snn.weight.Weight,
            "synapses": snn.synapse.Synapse,
            "readout": snn.readout.Readout,
            "modifiers": None, # [snn.modifier.Modifier,]
        }
        params = {
            "n_inputs": 10,
            "n_outputs": 10,
            "n_neurons": 50,
            "processing_time": 200,
            # + all part parameters, see Network.list_keys(**parts)
        }
        config = {**parts, **params}

        game = Logic(preset="XOR", **config)
        network = Network(game=game, **config)

        for _ in range(experiment_params["n_episodes"]):
            network.reset()
            state = game.reset()
            state_next = None

            for s in range(experiment_params["len_episode"]):
                action = network.tick(state)
                state_next, _, done, __ = game.step(action)
                reward = network.reward(state, action, state_next)
                state = state_next

                if done:
                    break

    .. code-block:: python

        experiment_params = {
            "n_episodes": 100,
            "len_episode": 200,
        }

        class network_template(Network):
            parts = {
                "inputs": snn.input.Input,
                "neurons": snn.neuron.Neuron,
                "weights": snn.weight.Weight,
                "synapses": snn.synapse.Synapse,
                "readout": snn.readout.Readout,
                "modifiers": None, # [snn.modifier.Modifier,]
            }
            keys = {
                "n_inputs": 10,
                "n_outputs": 10,
                "n_neurons": 50,
                "processing_time": 200,
                # + all part parameters, see Network.list_keys(**parts)
            }

        kwargs = {
            "n_neurons": 100,  # Overrides n_neurons in network_template.keys
        }

        game = Logic(preset="XOR", **kwargs)
        network = network_template(game=game, **kwargs)

        for _ in range(experiment_params["n_episodes"]):
            network.reset()
            state = game.reset()
            state_next = None

            for s in range(experiment_params["len_episode"]):
                action = network.tick(state)
                state_next, _, done, __ = game.step(action)
                reward = network.reward(state, action, state_next)
                state = state_next

                if done:
                    break
    """

    NECESSARY_KEYS = [
        Key("n_inputs", "Number input neurons, separate from body.", int),
        Key("n_outputs", "Number of output neurons, a subset of body neurons.",
            int),
        Key("n_neurons", "Number of neurons in the network.", int),
        Key(
            "processing_time",
            "Number of network timesteps per game timestep.",
            int,
        ),
    ]
    NECESSARY_PARTS = [
        Key("inputs", "snn.input.Input"),
        Key("neurons", "snn.neuron.Neuron"),
        Key("weights", "snn.weight.Weight"),
        Key("synapses", "snn.synapse.Synapse"),
        Key("readout", "snn.readout.Readout"),
        Key("modifiers", "list of snn.modifier.Modifier", default=None),
    ]

    def __init__(
        self,
        callback: object = None,
        game: object = None,
        **kwargs,
    ):
        if not hasattr(self, "parts"):
            self.parts = {}
        else:
            self.parts = deepcopy(type(self).parts)
        if "modifiers" not in self.parts:
            self.parts["modifiers"] = None
        for key in self.NECESSARY_PARTS:
            if key in kwargs:
                self.parts[key] = kwargs[key]
        self._params = {} if game is None else deepcopy(game.params)
        if hasattr(self, "keys"):
            self._params.update(self.keys)
        self._params.update(kwargs)

        super().__init__(**self._params)

        self.callback = callback or ExperimentCallback()

        self._init_parts()

        self.internal_time = self._spike_log = None

        self.callback.network_init(self)

    def _init_parts(self):
        for key in self.NECESSARY_PARTS:
            name = key.name if isinstance(key, Key) else key

            if name in self.parts:
                part = self.parts[name]
            elif isinstance(key, Key) and hasattr(key, "default"):
                part = key.default
            else:
                raise ValueError(f"No value given for key {name}!")

            if name == "synapses":
                value = part(self.weights, **self.params)
            elif part is None:
                value = part
            else:
                value = part(**self.params)

            setattr(self, name, value)

        if hasattr(self, "synapses") and hasattr(self, "weights"):
            self.synapses.weights = self.weights

    def train(self):
        """
        Set the module to training mode, enabled by default.
        """
        self.training = True
        for key in self.NECESSARY_PARTS:
            name = key.name if isinstance(key, Key) else key
            try:
                getattr(self, name).train()
            except AttributeError:
                pass

    def eval(self):
        """
        Set the module to evaluation mode, disabled by default.
        """
        self.training = False
        for key in self.NECESSARY_PARTS:
            name = key.name if isinstance(key, Key) else key
            try:
                getattr(self, name).eval()
            except AttributeError:
                pass

    @property
    def params(self) -> dict:
        """
        Read only configuration of network.
        """
        return deepcopy(self._params)

    @property
    def spike_log(self) -> np.bool:
        """
        Neuron spike log over processing_time with spike_log[-1] being most recent.
        """
        try:
            return self._spike_log[-self._processing_time:]
        except TypeError:
            return None

    @classmethod
    def list_keys(cls, **parts):
        """
        Print list of all required keys for the Network and
        its parts.
        """
        if isinstance(cls.NECESSARY_KEYS, dict):
            KEYS = {}
        else:
            KEYS = deepcopy(cls.NECESSARY_KEYS)
        for part in parts.values():
            if not hasattr(part, "NECESSARY_KEYS"):
                continue
            if isinstance(KEYS, dict):
                KEYS.update(part.NECESSARY_KEYS)
            else:
                KEYS.extend([p for p in part.NECESSARY_KEYS if p not in KEYS])
        if isinstance(cls.NECESSARY_KEYS, dict):
            KEYS.update(cls.NECESSARY_KEYS)

        print("{")
        for key in KEYS:
            if isinstance(key, Key):
                print(f"\t{str(key)},")
            else:
                desc = cls.NECESSARY_KEYS[key]
                print(f"\t{key}: {desc},")

        print("}")

    def __deepcopy__(self, memo={}):
        cls = self.__class__
        network = cls.__new__(cls)
        memo[id(self)] = network
        for k, v in self.__dict__.items():
            setattr(network, k, deepcopy(v, memo))
        network._init_parts()
        return network

    def reset(self):
        """
        Set network to initial state.

        Examples
        --------

        .. code-block:: python

            experiment_params = {
                "n_episodes": 100,
                "len_episode": 200,
            }

            class network_template(Network):
                parts = {
                    "inputs": snn.input.Input,
                    "neurons": snn.neuron.Neuron,
                    "weights": snn.weight.Weight,
                    "synapses": snn.synapse.Synapse,
                    "readout": snn.readout.Readout,
                    "modifiers": None, # [snn.modifier.Modifier,]
                }
                keys = {
                    "n_inputs": 10,
                    "n_outputs": 10,
                    "n_neurons": 50,
                    "processing_time": 200,
                    # + all part parameters, see Network.list_keys(**parts)
                }

            kwargs = {
                "n_neurons": 100,  # Overrides n_neurons in network_template.keys
            }

            game = Logic(preset="XOR", **kwargs)
            network = network_template(game=game, **kwargs)

            for _ in range(experiment_params["n_episodes"]):
                network.reset()
                state = game.reset()
                state_next = None

                for s in range(experiment_params["len_episode"]):
                    action = network.tick(state)
                    state_next, _, done, __ = game.step(action)
                    reward = network.reward(state, action, state_next)
                    state = state_next

                    if done:
                        break
        """
        self.internal_time = 0

        self.neurons.reset()
        self.synapses.reset()
        if hasattr(self, "rewarder"):
            self.rewarder.reset()
        self.readout.reset()
        self.inputs.reset()
        for modifier in self.modifiers or []:
            modifier.reset()

        self._spike_log = np.zeros(
            (
                self.synapses._stdp_window + self._processing_time,
                self._n_inputs + self._n_neurons,
            ),
            dtype=np.float16,
        )

        self.callback.network_reset()

    def _process_step(self, i: int, state: object):
        """
        Execute one processing step.

        Parameters
        ----------
        i: int
            Current processing timestep.
        state: any
            Current environment state.
        """
        self.internal_time += 1

        spikes = np.append(self.inputs(), self.neurons())

        self._spike_log[self.synapses._stdp_window + i] = spikes
        self._normalized_spike_log[self.synapses._stdp_window +
                                   i] = spikes.astype(bool)

        self.neurons += np.sum(self.synapses.weights * spikes.reshape((-1, 1)),
                               axis=0)

        self.synapses.update(
            self._normalized_spike_log[i:i + self.synapses._stdp_window],
            self._polarities,
        )

    def tick(self, state: object) -> object:
        """
        Determine network response to given stimulus.

        Parameters
        ----------
        state: any
            Current environment state.

        Returns
        -------
        any Network response to stimulus.

        Examples
        --------

        .. code-block:: python

            experiment_params = {
                "n_episodes": 100,
                "len_episode": 200,
            }

            class network_template(Network):
                parts = {
                    "inputs": snn.input.Input,
                    "neurons": snn.neuron.Neuron,
                    "weights": snn.weight.Weight,
                    "synapses": snn.synapse.Synapse,
                    "readout": snn.readout.Readout,
                    "modifiers": None, # [snn.modifier.Modifier,]
                }
                keys = {
                    "n_inputs": 10,
                    "n_outputs": 10,
                    "n_neurons": 50,
                    "processing_time": 200,
                    # + all part parameters, see Network.list_keys(**parts)
                }

            kwargs = {
                "n_neurons": 100,  # Overrides n_neurons in network_template.keys
            }

            game = Logic(preset="XOR", **kwargs)
            network = network_template(game=game, **kwargs)

            for _ in range(experiment_params["n_episodes"]):
                network.reset()
                state = game.reset()
                state_next = None

                for s in range(experiment_params["len_episode"]):
                    action = network.tick(state)
                    state_next, _, done, __ = game.step(action)
                    reward = network.reward(state, action, state_next)
                    state = state_next

                    if done:
                        break
        """
        self._polarities = np.append(self.inputs.polarities,
                                     self.neurons.polarities)

        self._spike_log[:self.synapses._stdp_window] = self._spike_log[
            -self.synapses._stdp_window:]
        self._normalized_spike_log = self._spike_log.astype(bool)

        self.inputs.update(state)

        if self.modifiers is not None:
            for modifier in self.modifiers:
                modifier.update(self)

        for i in range(self._processing_time):
            self._process_step(i, state)

        outputs = self._spike_log[-self._processing_time:, -self._n_outputs:]
        output = self.readout(outputs)

        self.callback.network_tick(state, output)
        return output
Exemple #16
0
class MatchExpected(Reward):
    """
    Give reward if action is the same as expected. Reward in
    a spiking neural network is meant to simulate dopamine in
    the real brain.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "reward_mult": 1,
            "punish_mult": 2,
        }
        rewarder = MatchExpected(**config)
        rewarder.reset()

        r = rewarder(state, action, state_next)

    .. code-block:: python

        class network_template(Network):
            keys = {
                "reward_mult": 1,
                "punish_mult": 2,
                "expected_value": ,
            }
            parts = {
                "rewarder": MatchExpected
            }
    """

    NECESSARY_KEYS = Reward.extend_keys([
        Key("expected_value", "func(state)->action Expected action."),
    ])

    def __call__(self, state: object, action: object,
                 state_next: object) -> float:
        """
        Determine how much reward should be given for taking action in state.
        reward_mult if action == expected else punish_mult.
        Called once per game or network step based on network chosen.

        Parameters
        ----------
        state: any
            Environment state before action is taken.
        action: any
            Action taken in response to state.
        state_next: any
            State of environment after action was taken.

        Returns
        -------
        float Reward for taking action in state.
        """
        expected = self._expected_value(state)

        rwd = np.sum(
            np.where(action == expected, self._reward_mult,
                     -self._punish_mult))

        return rwd
Exemple #17
0
class RLNetwork(Network):
    """
    The foundation for building and handling spiking neural networks.
    Network serves as the container and manager of all SNN parts like
    the neurons, synapses, reward function, ... It is designed to
    interact with an RL environment.

    .. note::
        There are a few types of Networks for different uses, this
        one is the base for reinforcement learning with SNNs giving one
        reward per game update(see ActiveRLNetwork reward for per network
        step).

    Parameters
    ----------
    callback: ExperimentCallback, default=None
        Callback to send relevant function call information to for logging.
    game: RL, default=None
        The environment the network will be interacting with, parameter
        is to allow network to pull relevant parameters in init.
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        experiment_params = {
            "n_episodes": 100,
            "len_episode": 200,
        }

        parts = {
            "inputs": snn.input.Input,
            "neurons": snn.neuron.Neuron,
            "weights": snn.weight.Weight,
            "synapses": snn.synapse.Synapse,
            "readout": snn.readout.Readout,
            "rewarder": snn.reward.Reward,
            "modifiers": None, # [snn.modifier.Modifier,]
        }
        params = {
            "n_inputs": 10,
            "n_outputs": 10,
            "n_neurons": 50,
            "processing_time": 200,
            # + all part parameters, see Network.list_keys(**parts)
        }
        config = {**parts, **params}

        game = Logic(preset="XOR", **config)
        network = RLNetwork(game=game, **config)

        for _ in range(experiment_params["n_episodes"]):
            network.reset()
            state = game.reset()
            state_next = None

            for s in range(experiment_params["len_episode"]):
                action = network.tick(state)
                state_next, _, done, __ = game.step(action)
                reward = network.reward(state, action, state_next)
                state = state_next

                if done:
                    break

    .. code-block:: python

        experiment_params = {
            "n_episodes": 100,
            "len_episode": 200,
        }

        class network_template(RLNetwork):
            parts = {
                "inputs": snn.input.Input,
                "neurons": snn.neuron.Neuron,
                "weights": snn.weight.Weight,
                "synapses": snn.synapse.Synapse,
                "readout": snn.readout.Readout,
                "rewarder": snn.reward.Reward,
                "modifiers": None, # [snn.modifier.Modifier,]
            }
            keys = {
                "n_inputs": 10,
                "n_outputs": 10,
                "n_neurons": 50,
                "processing_time": 200,
                # + all part parameters, see Network.list_keys(**parts)
            }

        kwargs = {
            "n_neurons": 100,  # Overrides n_neurons in network_template.keys
        }

        game = Logic(preset="XOR", **kwargs)
        network = network_template(game=game, **kwargs)

        for _ in range(experiment_params["n_episodes"]):
            network.reset()
            state = game.reset()
            state_next = None

            for s in range(experiment_params["len_episode"]):
                action = network.tick(state)
                state_next, _, done, __ = game.step(action)
                reward = network.reward(state, action, state_next)
                state = state_next

                if done:
                    break
    """

    NECESSARY_PARTS = Network.extend_keys(
        [
            Key("rewarder", "snn.reward.Reward"),
        ],
        base="NECESSARY_PARTS",
    )

    def __init__(
        self,
        callback: object = None,
        game: object = None,
        **kwargs,
    ):
        super().__init__(callback=callback, game=game, **kwargs)

    def reward(self,
               state: object,
               action: object,
               state_next: object,
               reward: float = None) -> float:
        """
        If reward given as parameter, apply reward to synapses.
        Otherwise rewarder calculates based on state and action, then applies to synapses.
        Called once per game step.

        Parameters
        ----------
        state: any
            State of environment where action was taken.
        action: any
            Action taken by network in response to state.
        state_next: any
            State of environment after action was taken.
        reward: float, default=None
            Reward to give network, if None it will be determined by the rewarder.

        Returns
        -------
        float Reward given to network.

        Examples
        --------

        .. code-block:: python

            experiment_params = {
                "n_episodes": 100,
                "len_episode": 200,
            }

            class network_template(RLNetwork):
                parts = {
                    "inputs": snn.input.Input,
                    "neurons": snn.neuron.Neuron,
                    "weights": snn.weight.Weight,
                    "synapses": snn.synapse.Synapse,
                    "readout": snn.readout.Readout,
                    "modifiers": None, # [snn.modifier.Modifier,]
                }
                keys = {
                    "n_inputs": 10,
                    "n_outputs": 10,
                    "n_neurons": 50,
                    "processing_time": 200,
                    # + all part parameters, see Network.list_keys(**parts)
                }

            kwargs = {
                "n_neurons": 100,  # Overrides n_neurons in network_template.keys
            }

            game = Logic(preset="XOR", **kwargs)
            network = network_template(game=game, **kwargs)

            for _ in range(experiment_params["n_episodes"]):
                network.reset()
                state = game.reset()
                state_next = None

                for s in range(experiment_params["len_episode"]):
                    action = network.tick(state)
                    state_next, _, done, __ = game.step(action)
                    reward = network.reward(state, action, state_next)
                    state = state_next

                    if done:
                        break
        """
        reward = (reward if reward is not None else self.rewarder(
            state, action, state_next))

        self.synapses.reward(reward)

        self.callback.network_reward(state, action, state_next, reward)
        return reward
Exemple #18
0
class RandPotential(Neuron):
    """
    A group of spiking neurons with noise `~U(0, potential_noise_scale)` is added
    to `n_neurons * prob_rand_fire` neurons at each step.

    Each spiking neuron has an internal membrane potential that
    increases with each incoming spike. The potential persists but slowly
    decreases over time. Each neuron fires when its potential surpasses
    some firing threshold and does not fire again for the duration
    of its refractory period.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "magnitude": 2,
            "n_neurons": 100,
            "neuron_pct_inhibitory": .2,
            "potential_decay": .2,
            "prob_rand_fire": .08,
            "refractory_period": 1,
            "resting_mv": 0,
            "spike_delay": 0,
            "potential_noise_scale": .1,
        }
        neurons = Neuron(**config)
        neurons.reset()

        weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons]))

        for i in range(100):
            spikes = self.neurons()

            neurons += np.sum(
                weights * spikes.reshape((-1, 1)), axis=0
            )

    .. code-block:: python

        class network_template(Network):
            keys = {
                "magnitude": 2,
                "n_neurons": 100,
                "neuron_pct_inhibitory": .2,
                "potential_decay": .2,
                "prob_rand_fire": .08,
                "refractory_period": 1,
                "potential_noise_scale": .1,
            }
            parts = {
                "neurons": Neuron
            }
    """

    NECESSARY_KEYS = Neuron.extend_keys([
        Key("potential_noise_scale", "Multiplier of leak to add to potential.",
            float)
    ])

    def __call__(self) -> np.bool:
        """
        Add noise `~U(0, potential_noise_scale)` to `n_neurons * prob_rand_fire` neurons
        then determine whether each neuron will fire or not according to threshold.
        Called once per network step.

        Parameters
        ----------
        threshold: float
            Spiking threshold, neurons schedule spikes if potentials >= threshold.

        Returns
        -------
        ndarray[n_neurons, dtype=bool] Spike output from each neuron at the current timestep.

        Examples
        --------

        .. code-block:: python

            config = {
                "magnitude": 2,
                "n_neurons": 100,
                "neuron_pct_inhibitory": .2,
                "potential_decay": .2,
                "prob_rand_fire": .08,
                "refractory_period": 1,
                "potential_noise_scale": .1,
                "firing_threshold": 16,
            }
            neurons = Neuron(**config)
            neurons.reset()

            weights = np.random.uniform(0, 2, size=(config['n_neurons'], config['n_neurons]))

            for i in range(100):
                spikes = self.neurons()

                neurons += np.sum(
                    weights * spikes.reshape((-1, 1)), axis=0
                )
        """
        noise = np.random.uniform(0,
                                  self._potential_noise_scale,
                                  size=self._n_neurons)
        noise[~(np.random.uniform(0, 1, size=self._n_neurons) <= self.
                _prob_rand_fire)] = 0

        self.potentials += noise

        spike_occurences = self.potentials >= self._firing_threshold

        self.refractory_timers[spike_occurences] = self._refractory_period + 1
        self.schedule += self.spike_shape * np.int_(spike_occurences)

        output = self.schedule[0] * self.polarities * self._magnitude

        return output
Exemple #19
0
class Readout(Module):
    """
    Translator from output neuron spike trains to actions
    for the environment.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "n_outputs": 10,
            "magnitude": 2,
        }
        readout = Readout(**config)
        readout.reset()

        action = readout(np.ones((10, config["n_outputs"])))

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_outputs": 10,
                "magnitude": 2,
            }
            parts = {
                "readout": Readout
            }
    """

    NECESSARY_KEYS = [
        Key("n_outputs", "Number of output neurons, a subset of body neurons.",
            int),
        Key("magnitude", "Spike fire magnitude.", float),
    ]

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

    def reset(self):
        """
        Reset all readout members.
        Called at the start of each episode.
        """
        pass

    def __call__(self, output_spike_train: np.bool) -> object:
        """
        Interpret the output neuron's spike train.
        Called once per game step.

        Parameters
        ----------
        output_spike_train: np.ndarray[t, n_neurons, dtype=bool]
            Spike train with train[-1] being the most recent time.

        Returns
        -------
        object Action chosen.
        """
        raise NotImplementedError(
            f"__call__ not implemented for {type(self)}!")
Exemple #20
0
class Logic(RL):
    """
    Game of trying to mimic logic gates.

    Parameters
    ----------
    preset: str=PRESETS.keys(), default="OR"
        Configuration preset key, default values for game parameters.
    callback: ExperimentCallback, default=None
        Callback to send relevant function call information to.
    kwargs: dict, default=None
        Game parameters for NECESSARY_KEYS. Overrides preset settings.


    Examples
    --------

    .. code-block:: python

        game = Logic(preset="OR")
        game.seed(0)

        state = game.reset()
        for _ in range(100):
            action = model.get_action(state)
            state, reward, done, info = game.step(action)
            if done:
                break

        game.close()

    .. code-block:: python

        class game_template(Logic):
            config = Logic.PRESETS["XOR"]

            config.update({  # Overrides preset values
                "param1": 1
                "param2": 2,
            })

        kwargs = {
            "param1": 0,  # Overrides game_template.config["param1"]
        }
        game = game_template(**kwargs)
        game.seed(0)

        state = game.reset()
        for _ in range(100):
            action = model.get_action(state)
            state, reward, done, info = game.step(action)
            if done:
                break

        game.close()
    """

    action_space = [False, True]
    observation_space = [(a, b) for a in [False, True] for b in [False, True]]

    metadata = {}

    NECESSARY_KEYS = [
        Key(
            "expected_value",
            "func(state) Correct response of logic gate to specific state.",
        ),
    ]

    PRESETS = {
        "AND": {
            "name": "AND",
            "expected_value": and_fn,
        },
        "OR": {
            "name": "OR",
            "expected_value": or_fn,
        },
        "XOR": {
            "name": "XOR",
            "expected_value": xor_fn,
        },
    }

    def __init__(self, preset: str = "OR", callback: object = None, **kwargs):
        super().__init__(preset=preset, callback=callback, **kwargs)

    def _get_state(self) -> np.ndarray:
        """
        Randomly generate a network state.

        Returns
        -------
        ndarray[2, bool] Randomly generated inputs to logic gate.
        """
        state = np.random.uniform(size=2) <= 0.5

        return tuple(state)

    def step(self, action: bool) -> (np.ndarray, 0, bool, {}):
        """
        Act within the environment.

        Parameters
        ----------
        action: bool
            Action taken in environment.

        Returns
        -------
        state: np.ndarray
            ndarray[2, bool] Randomly generated inputs to logic gate.
        reward: float, = 0
            Reward given by environment.
        done: bool
            Whether the game is done or not.
        info: dict, = {}
            Information of environment.


        Examples
        --------

        .. code-block:: python

            game = Logic(preset="OR")
            game.seed(0)

            state = game.reset()
            for _ in range(100):
                action = model.get_action(state)
                state, reward, done, info = game.step(action)
                if done:
                    break

            game.close()
        """
        state_new = self._get_state()
        done = False

        rwd = 0
        info = {}

        self.callback.game_step(action, self._state, state_new, rwd, done,
                                info)
        self._state = state_new
        return state_new, rwd, done, info

    def reset(self) -> np.ndarray:
        """
        Reset environment.

        Returns
        -------
        np.ndarray[2, bool] Initial state, random inputs to logic gate.


        Examples
        --------

        .. code-block:: python

            game = Logic(preset="OR")
            game.seed(0)

            state = game.reset()
        """
        state = self._get_state()

        self.callback.game_reset(state)
        self._state = state
        return state
Exemple #21
0
class Weight(Module):
    """
    The data structure to generate and manage connections between neurons.
    Contains generation, arithmetic and get operations.
    Updates are handled in spikey.snn.Synapse objects.

    .. note::
        Weight._matrix must be a masked ndarray with fill_value=0 while Weight.matrix
        is a simple ndarray.

        Arithmetic operations(a * b) use unmasked matrix for speed while inplace(a += b)
        arithmetic uses masked values.

        Get operations(Weight[[1, 2, 3]]) apply to masked ndarray.

    Parameters
    ----------
    kwargs: dict
        Dictionary with values for each key in NECESSARY_KEYS.

    Examples
    --------

    .. code-block:: python

        config = {
            "n_inputs": 1,
            "n_neurons": 10,
            "max_weight": 3,
        }
        w = Weight(**config)

        in_volts = w * np.ones(config['n_neurons'])

    .. code-block:: python

        class network_template(Network):
            keys = {
                "n_inputs": 1,
                "n_neurons": 10,
                "max_weight": 3,
            }
            parts = {
                "weights": Weight
            }
    """

    NECESSARY_KEYS = [
        Key("n_inputs", "Number input neurons, separate from body.", int),
        Key("n_neurons", "Number of neurons in network.", int),
        Key("max_weight", "Max synapse weight.", float),
    ]

    def __init__(self, **kwargs):
        self._matrix = None
        super().__init__(**kwargs)

    def _assert_matrix_shape(self, matrix, key):
        expected_shape = (self._n_inputs + self._n_neurons, self._n_neurons)
        real_shape = matrix.shape
        if not np.array_equal(real_shape, expected_shape):
            base_error = f"Expected '{key}' shape to equal (N_INPUTS+N_NEURONS, N_NEURONS)[{expected_shape}], not {real_shape}!"
            if len(real_shape) > 2:
                raise ValueError(
                    base_error +
                    f" Squeeze extra single valued dimensions with `{key}.squeeze()`."
                )
            elif np.array_equal(real_shape,
                                (self._n_neurons, self._n_neurons)):
                raise ValueError(
                    base_error +
                    " Add N_INPUTS to the first dimension of your matrix.")
            elif np.array_equal(
                    real_shape,
                (self._n_neurons, self._n_inputs + self._n_neurons)):
                raise ValueError(base_error +
                                 f" Transpose your matrix with `{key}.T`.")
            else:
                raise ValueError(base_error)

    def _convert_feedforward(self, layers):
        """
        Convert network in feedforward layer format to weight matrix format.
        NOTE: Layers given as masked arrays will have masks dropped.

        Parameters
        ----------
        layers: [ndarray, ndarray, ...]
            Network to convert.

        Returns
        -------
        ndarray Network in weight matrix format.
        """
        matrix = np.zeros((self._n_inputs + self._n_neurons, self._n_neurons),
                          dtype=float)

        row_offset, col_offset = 0, 0
        for i, layer in enumerate(layers):
            n, m = layer.shape
            matrix[row_offset:row_offset + n,
                   col_offset:col_offset + m] = layer
            row_offset += n
            col_offset += m

        return matrix

    @property
    def matrix(self) -> np.float:
        """
        Return unmasked weight matrix.
        """
        if isinstance(self._matrix, np.ma.core.MaskedArray):
            return self._matrix.data

        return self._matrix

    def clip(self):
        """
        Restrict weights to 0 and max_weight.
        """
        np.clip(self._matrix.data,
                0.0,
                float(self._max_weight),
                out=self._matrix.data)

    def __get__(self, obj: object, objtype: object) -> np.float:
        return self.matrix

    def __set__(self, obj: object, value: object):
        self.matrix = value

    def __getitem__(self, idx: np.int) -> np.float:
        return self._matrix[idx]

    def __add__(self, addend: np.ndarray) -> np.float:
        return self.matrix + addend

    def __iadd__(self, addend: np.ndarray):
        self._matrix += addend

        self.clip()

        return self

    def __sub__(self, subtractor: np.ndarray) -> np.float:
        return self.matrix - subtractor

    def __isub__(self, subtractor: np.ndarray):
        self._matrix -= subtractor

        self.clip()

        return self

    def __mul__(self, multiplier: np.ndarray) -> np.float:
        return self.matrix * multiplier

    def __imul__(self, multiplier: np.ndarray):
        self._matrix *= multiplier

        self.clip()

        return self

    def __truediv__(self, divisor: np.ndarray) -> np.float:
        return self.matrix / divisor

    def __itruediv__(self, divisor: np.ndarray):
        self._matrix /= divisor

        self.clip()

        return self
Exemple #22
0
class MetaNQueens(MetaRL):
    """
    Game to try and place a number of queen chess pieces on a chess
    board without any of them being to attack another in the same move.

    92 distinct solutions out of 4 billion possibilities w/ 8 queens.

    Genotypes are parameterized as follows,

    .. code-block:: python

        for i in range(n_agents):
            xi: int in {0, 7} X position of queen i.
            yi: int in {0, 7} Y position of queen i.

    Parameters
    ----------
    kwargs: dict, default=None
        Game parameters for NECESSARY_KEYS. Overrides preset settings.

    Examples
    --------

    .. code-block:: python

        metagame = MetaNQueens()
        game.seed(0)
        for _ in range(100):
            genotype = [{}, ...]
            fitness, done = metagame.get_fitness(genotype)
            if done:
                break
        game.close()

    .. code-block:: python

        metagame = MetaNQueens(**metagame_config)
        game.seed(0)
        population = Population(... metagame, ...)
        # population main loop
    """

    NECESSARY_KEYS = MetaRL.extend_keys([
        Key(
            "n_queens",
            "{1..8}Number of queens agent needs to place on board.",
            int,
            default=8,
        )
    ])
    GENOTYPE_CONSTRAINTS = {}  ## Defined in __init__

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        if self._n_queens > 8 or self._n_queens < 1:
            raise ValueError(
                f"n_queens must be in range [1, 8], not {self._n_queens}!")

        self.letters = ["a", "b", "c", "d", "e", "f", "g",
                        "h"][:self._n_queens]
        keys = [
            first + second for second in ["x", "y"] for first in self.letters
        ]

        self.GENOTYPE_CONSTRAINTS = {key: list(range(8)) for key in keys}

    @staticmethod
    def setup_game() -> list:
        """
        Setup game.

        Returns
        -------
        list Initial board state, number of queens in each horizontal, vertical and diagonal line.
        """
        horizontals = np.zeros(8)
        verticals = np.zeros(8)
        ldiagonals = np.zeros(15)  # \
        rdiagonals = np.zeros(15)  # /

        return horizontals, verticals, ldiagonals, rdiagonals

    @staticmethod
    def run_move(board: list, move: tuple) -> list:
        """
        Execute action.

        Parameters
        ----------
        board: list
            Number of queens across each horizontal, vertical and diagonal line.
        move: (x, y) in [0, 7]
            X and Y coordinate to place queen.

        Returns
        -------
        [horizontals: list, verticals: list, ldiagonals: list, rdiagonals: list] Updated board.
        """
        horizontals, verticals, ldiagonals, rdiagonals = board
        x, y = move

        horizontals[x] += 1
        verticals[y] += 1
        ldiagonals[x + y] += 1
        rdiagonals[7 - x + y] += 1

        return horizontals, verticals, ldiagonals, rdiagonals

    def get_fitness(
        self,
        genotype: dict,
    ) -> (float, bool):
        """
        Evaluate the fitness of a genotype.

        Parameters
        ----------
        genotype: dict
            Dictionary with values for each key in GENOTYPE_CONSTRAINTS.

        Returns
        -------
        fitness: float
            Fitness of genotype given.
        done: bool
            Whether termination condition has been reached or not.

        Examples
        --------

        .. code-block:: python

            metagame = MetaNQueens()
            game.seed(0)
            for _ in range(100):
                genotype = [{}, ...]
                fitness, done = metagame.get_fitness(genotype)
                if done:
                    break
            game.close()
        """
        board = self.setup_game()

        for letter in self.letters:
            move = (genotype[letter + "x"], genotype[letter + "y"])

            board = self.run_move(board, move)

        clashes = 0

        for item in board:
            clashes += np.sum(item[item > 1] - 1)

        fitness = 28 - clashes
        terminate = clashes == 0

        return fitness, terminate