예제 #1
0
    def test_plot_runs(self):
        ma = MatplotlibAnalyzer()
        ta = TensorboardAnalyzer("./logs/runs")

        for analyzer in [ma, ta]:
            obs = torch.rand(1, 28, 28)
            analyzer.plot_obs(obs)

            # 4 channels out, 1 channel in, 8x8 kernels
            conv_weights = torch.rand(4, 1, 8, 8)
            analyzer.plot_conv2d_weights(conv_weights)

            rewards = [0, 0, 0, 0, 0]
            analyzer.plot_reward(rewards)

            # Monitors have time as last dimension
            v = torch.rand(50, 1, 1, 28, 28)
            voltage_dict = {"X": v}
            threshold_dict = {"X": torch.tensor(0.75)}
            analyzer.plot_voltages(voltage_dict, threshold_dict)

            # The monitors have time as last dimension
            spikes = torch.rand(50, 1, 1, 28, 28) > 0.5
            spike_dict = {"X": spikes}
            analyzer.plot_spikes(spike_dict)

            analyzer.finalize_step()

        ta.writer.close()
예제 #2
0
    def test_init(self):
        ma = MatplotlibAnalyzer()
        assert plt.isinteractive()

        ta = TensorboardAnalyzer("./logs/init")

        # check to ensure path was written
        assert os.path.isdir("./logs/init")

        # check to ensure we can write data
        ta.writer.add_scalar("init_scalar", 100.0, 0)
        ta.writer.close()
예제 #3
0
    norm=0.4 * kernel_size**2,
    nu=[1e-4, 1e-2],
    wmax=1.0,
)

network.add_layer(input_layer, name="X")
network.add_layer(conv_layer, name="Y")
network.add_connection(conv_conn, source="X", target="Y")

# Train the network.
print("Begin training.\n")

if args.tensorboard:
    analyzer = TensorboardAnalyzer("logs/conv")
else:
    analyzer = MatplotlibAnalyzer()

for step, batch in enumerate(tqdm(train_dataloader)):
    # batch contains image, label, encoded_image since an image_encoder
    # was provided

    # batch["encoded_image"] is in BxTxCxHxW format
    inputs = {"X": batch["encoded_image"]}

    # Run the network on the input.
    # Specify the location of the time dimension
    network.run(inputs=inputs, time=time, input_time_dim=1)

    network.reset_state_variables()  # Reset state variables.

    analyzer.plot_conv2d_weights(conv_conn.w, step=step)
예제 #4
0
    def __init__(
        self,
        network: Network,
        environment: Environment,
        action_function: Optional[Callable] = None,
        encoding: Optional[Callable] = None,
        **kwargs,
    ):
        # language=rst
        """
        Initializes the pipeline.

        :param network: Arbitrary network object.
        :param environment: Arbitrary environment.
        :param action_function: Function to convert network outputs into environment inputs.
        :param encoding: Function to encoding input.

        Keyword arguments:

        :param str device: PyTorch computing device
        :param encode_factor: coefficient for the input before encoding.
        :param int num_episodes: Number of episodes to train for. Defaults to 100.
        :param str output: String name of the layer from which to take output.
        :param int render_interval: Interval to render the environment.
        :param int reward_delay: How many iterations to delay delivery of reward.
        :param int time: Time for which to run the network. Defaults to the network's
        :param int overlay_input: Overlay the last X previous input
        :param float percent_of_random_action: chance to choose random action
        :param int random_action_after: take random action if same output action counter reach

            timestep.
        """
        super().__init__(network, **kwargs)

        self.episode = 0

        self.env = environment
        self.action_function = action_function
        self.encoding = encoding

        self.accumulated_reward = 0.0
        self.reward_list = []

        # Setting kwargs.
        self.num_episodes = kwargs.get("num_episodes", 100)
        self.output = kwargs.get("output", None)
        self.render_interval = kwargs.get("render_interval", None)
        self.plot_interval = kwargs.get("plot_interval", None)
        self.reward_delay = kwargs.get("reward_delay", None)
        self.time = kwargs.get("time", int(network.dt))
        self.overlay_t = kwargs.get("overlay_input", 1)
        self.percent_of_random_action = kwargs.get("percent_of_random_action",
                                                   0.0)
        self.encode_factor = kwargs.get("encode_factor", 1.0)

        if torch.cuda.is_available() and self.allow_gpu:
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")

        # var for overlay process
        if self.overlay_t > 1:
            self.overlay_time_effect = torch.tensor(
                [i / self.overlay_t for i in range(1, self.overlay_t + 1)],
                dtype=torch.float,
                device=self.device,
            )
        self.overlay_start = True

        if self.reward_delay is not None:
            assert self.reward_delay > 0
            self.rewards = torch.zeros(self.reward_delay)

        # Set up for multiple layers of input layers.
        self.inputs = [
            name for name, layer in network.layers.items()
            if isinstance(layer, AbstractInput)
        ]

        self.action = torch.tensor(-1, device=self.device)
        self.last_action = torch.tensor(-1, device=self.device)
        self.action_counter = 0
        self.random_action_after = kwargs.get("random_action_after", self.time)

        self.voltage_record = None
        self.threshold_value = None
        self.reward_plot = None
        self.first = True

        self.analyzer = MatplotlibAnalyzer(**self.plot_config)

        if self.output is not None:
            self.network.add_monitor(
                Monitor(self.network.layers[self.output], ["s"],
                        time=self.time),
                self.output,
            )

            self.spike_record = {
                self.output:
                torch.zeros(
                    (self.time, self.env.action_space.n)).to(self.device)
            }
예제 #5
0
class EnvironmentPipeline(BasePipeline):
    # language=rst
    """
    Abstracts the interaction between ``Network``, ``Environment``, and environment
    feedback action.
    """
    def __init__(
        self,
        network: Network,
        environment: Environment,
        action_function: Optional[Callable] = None,
        encoding: Optional[Callable] = None,
        **kwargs,
    ):
        # language=rst
        """
        Initializes the pipeline.

        :param network: Arbitrary network object.
        :param environment: Arbitrary environment.
        :param action_function: Function to convert network outputs into environment inputs.
        :param encoding: Function to encoding input.

        Keyword arguments:

        :param str device: PyTorch computing device
        :param encode_factor: coefficient for the input before encoding.
        :param int num_episodes: Number of episodes to train for. Defaults to 100.
        :param str output: String name of the layer from which to take output.
        :param int render_interval: Interval to render the environment.
        :param int reward_delay: How many iterations to delay delivery of reward.
        :param int time: Time for which to run the network. Defaults to the network's
        :param int overlay_input: Overlay the last X previous input
        :param float percent_of_random_action: chance to choose random action
        :param int random_action_after: take random action if same output action counter reach

            timestep.
        """
        super().__init__(network, **kwargs)

        self.episode = 0

        self.env = environment
        self.action_function = action_function
        self.encoding = encoding

        self.accumulated_reward = 0.0
        self.reward_list = []

        # Setting kwargs.
        self.num_episodes = kwargs.get("num_episodes", 100)
        self.output = kwargs.get("output", None)
        self.render_interval = kwargs.get("render_interval", None)
        self.plot_interval = kwargs.get("plot_interval", None)
        self.reward_delay = kwargs.get("reward_delay", None)
        self.time = kwargs.get("time", int(network.dt))
        self.overlay_t = kwargs.get("overlay_input", 1)
        self.percent_of_random_action = kwargs.get("percent_of_random_action",
                                                   0.0)
        self.encode_factor = kwargs.get("encode_factor", 1.0)

        if torch.cuda.is_available() and self.allow_gpu:
            self.device = torch.device("cuda")
        else:
            self.device = torch.device("cpu")

        # var for overlay process
        if self.overlay_t > 1:
            self.overlay_time_effect = torch.tensor(
                [i / self.overlay_t for i in range(1, self.overlay_t + 1)],
                dtype=torch.float,
                device=self.device,
            )
        self.overlay_start = True

        if self.reward_delay is not None:
            assert self.reward_delay > 0
            self.rewards = torch.zeros(self.reward_delay)

        # Set up for multiple layers of input layers.
        self.inputs = [
            name for name, layer in network.layers.items()
            if isinstance(layer, AbstractInput)
        ]

        self.action = torch.tensor(-1, device=self.device)
        self.last_action = torch.tensor(-1, device=self.device)
        self.action_counter = 0
        self.random_action_after = kwargs.get("random_action_after", self.time)

        self.voltage_record = None
        self.threshold_value = None
        self.reward_plot = None
        self.first = True

        self.analyzer = MatplotlibAnalyzer(**self.plot_config)

        if self.output is not None:
            self.network.add_monitor(
                Monitor(self.network.layers[self.output], ["s"],
                        time=self.time),
                self.output,
            )

            self.spike_record = {
                self.output:
                torch.zeros(
                    (self.time, self.env.action_space.n)).to(self.device)
            }

    def init_fn(self) -> None:
        pass

    def train(self, **kwargs) -> None:
        # language=rst
        """
        Trains for the specified number of episodes. Each episode can be of arbitrary
        length.
        """
        while self.episode < self.num_episodes:
            self.reset_state_variables()

            for _ in itertools.count():
                obs, reward, done, info = self.env_step()

                self.step((obs, reward, done, info), **kwargs)

                if done:
                    break

            print(f"Episode: {self.episode} - "
                  f"accumulated reward: {self.accumulated_reward:.2f}")
            self.episode += 1

    def env_step(self) -> Tuple[torch.Tensor, float, bool, Dict]:
        # language=rst
        """
        Single step of the environment which includes rendering, getting and performing
        the action, and accumulating/delaying rewards.

        :return: An OpenAI ``gym`` compatible tuple with modified reward and info.
        """
        # Render game.
        if (self.render_interval is not None
                and self.step_count % self.render_interval == 0):
            self.env.render()

        # Choose action based on output neuron spiking.
        if self.action_function is not None:
            self.last_action = self.action
            if torch.rand(1) < self.percent_of_random_action:
                self.action = torch.randint(low=0,
                                            high=self.env.action_space.n,
                                            size=(1, ))[0]
            elif self.action_counter > self.random_action_after:
                if self.last_action == 0:  # last action was start b
                    self.action = 1  # next action will be fire b
                    tqdm.write(f"Fire -> too many times {self.last_action} ")
                else:
                    self.action = torch.randint(low=0,
                                                high=self.env.action_space.n,
                                                size=(1, ))[0]
                    tqdm.write(f"too many times {self.last_action} ")
            else:
                self.action = self.action_function(self, output=self.output)

            if self.last_action == self.action:
                self.action_counter += 1
            else:
                self.action_counter = 0

        # Run a step of the environment.
        obs, reward, done, info = self.env.step(self.action)

        # Set reward in case of delay.
        if self.reward_delay is not None:
            self.rewards = torch.tensor([reward, *self.rewards[1:]]).float()
            reward = self.rewards[-1]

        # Accumulate reward.
        self.accumulated_reward += reward

        info["accumulated_reward"] = self.accumulated_reward

        return obs, reward, done, info

    def step_(self, gym_batch: Tuple[torch.Tensor, float, bool, Dict],
              **kwargs) -> None:
        # language=rst
        """
        Run a single iteration of the network and update it and the reward list when
        done.

        :param gym_batch: An OpenAI ``gym`` compatible tuple.
        """
        obs, reward, done, info = gym_batch

        if self.overlay_t > 1:
            if self.overlay_start:
                self.overlay_last_obs = (obs.view(
                    obs.shape[2], obs.shape[3]).clone().to(self.device))
                self.overlay_buffer = torch.stack([self.overlay_last_obs] *
                                                  self.overlay_t,
                                                  dim=2).to(self.device)
                self.overlay_start = False
            else:
                obs = obs.to(self.device)
                self.overlay_next_stat = torch.clamp(self.overlay_last_obs -
                                                     obs,
                                                     min=0).to(self.device)
                self.overlay_last_obs = obs.clone()
                self.overlay_buffer = torch.cat(
                    (
                        self.overlay_buffer[:, :, 1:],
                        self.overlay_next_stat.view([
                            self.overlay_next_stat.shape[2],
                            self.overlay_next_stat.shape[3],
                            1,
                        ]),
                    ),
                    dim=2,
                )
            obs = (torch.sum(self.overlay_time_effect * self.overlay_buffer,
                             dim=2) * self.encode_factor)

        # Place the observations into the inputs.
        if self.encoding is None:
            obs = obs.unsqueeze(0).unsqueeze(0)
            obs_shape = torch.tensor([1] * len(obs.shape[1:]),
                                     device=self.device)
            inputs = {
                k: obs.repeat(self.time, *obs_shape).to(self.device)
                for k in self.inputs
            }
        else:
            obs = obs.unsqueeze(0)
            inputs = {
                k: self.encoding(obs, self.time, device=self.device)
                for k in self.inputs
            }

        # Run the network on the spike train-encoded inputs.
        self.network.run(inputs=inputs,
                         time=self.time,
                         reward=reward,
                         **kwargs)

        if self.output is not None:
            self.spike_record[self.output] = (
                self.network.monitors[self.output].get("s").float())

        if done:
            if self.network.reward_fn is not None:
                self.network.reward_fn.update(
                    accumulated_reward=self.accumulated_reward,
                    steps=self.step_count,
                    **kwargs,
                )
            self.reward_list.append(self.accumulated_reward)

    def reset_state_variables(self) -> None:
        # language=rst
        """
        Reset the pipeline.
        """
        self.env.reset()
        self.network.reset_state_variables()
        self.accumulated_reward = 0.0
        self.step_count = 0
        self.overlay_start = True
        self.action = torch.tensor(-1)
        self.last_action = torch.tensor(-1)
        self.action_counter = 0

    def plots(self, gym_batch: Tuple[torch.Tensor, float, bool, Dict],
              *args) -> None:
        # language=rst
        """
        Plot the encoded input, layer spikes, and layer voltages.

        :param gym_batch: An OpenAI ``gym`` compatible tuple.
        """
        if self.plot_interval is None:
            return

        obs, reward, done, info = gym_batch

        for key, item in self.plot_config.items():
            if key == "obs_step" and item is not None:
                if self.step_count % item == 0:
                    self.analyzer.plot_obs(obs[0, ...].sum(0))
            elif key == "data_step" and item is not None:
                if self.step_count % item == 0:
                    self.analyzer.plot_spikes(self.get_spike_data())
                    self.analyzer.plot_voltages(*self.get_voltage_data())
            elif key == "reward_eps" and item is not None:
                if self.episode % item == 0 and done:
                    self.analyzer.plot_reward(self.reward_list)

        self.analyzer.finalize_step()