def __init__(self, input_size, hidden_size, num_layers, bias=True, dropout_p=0, invariant_dropout_mask=False, bidirectional=False, surrogate_function=surrogate.Erf()): super().__init__(input_size, hidden_size, num_layers, bias, dropout_p, invariant_dropout_mask, bidirectional, surrogate_function)
def __init__(self, input_size: int, hidden_size: int, bias=True, surrogate_function=surrogate.Erf()): super().__init__(input_size, hidden_size, bias) self.linear_ih = nn.Linear(input_size, hidden_size, bias=bias) self.linear_hh = nn.Linear(hidden_size, hidden_size, bias=bias) self.surrogate_function = surrogate_function self.reset_parameters()
def __init__(self, input_size: int, hidden_size: int, bias=True, surrogate_function1=surrogate.Erf(), surrogate_function2=None): super().__init__(input_size, hidden_size, bias) self.linear_ih = nn.Linear(input_size, 3 * hidden_size, bias=bias) self.linear_hh = nn.Linear(hidden_size, 3 * hidden_size, bias=bias) self.surrogate_function1 = surrogate_function1 self.surrogate_function2 = surrogate_function2 if self.surrogate_function2 is not None: assert self.surrogate_function1.spiking == self.surrogate_function2.spiking self.reset_parameters()
def __init__(self, neuron_shape, tau_m: float, tau_adp: float, v_threshold_baseline=1.0, v_threshold_range=1.8, v_reset=0.0, surrogate_function=surrogate.Erf(), monitor_state=False, dt=1.0): ''' * :ref:`API in English <AdaptThresholdNode.__init__-en>` .. _AdaptThresholdNode.__init__-cn: :param neuron_shape: 神经元张量的形状 :type neuron_shape: array_like :param tau_m: 膜电位时间常数 :type tau_m: float :param tau_adp: 阈值时间常数 :type tau_adp: float :param v_threshold_baseline: 最小阈值,也为初始阈值 :math:`b_0` ,默认为1.0 :type v_threshold_baseline: float :param v_threshold_range: 决定阈值变化范围的参数 :math:`\\beta` ,默认为1.8。控制阈值的范围为 :math:`[b_0,b_0+\\beta]` :type v_threshold_range: float :param v_reset: 神经元的重置电压。如果不为 ``None``,当神经元释放脉冲后,电压会被重置为 ``v_reset``;如果设置为 ``None``,则电压会被减去 ``v_threshold``,默认为0.0 :type v_reset: float :param surrogate_function: 反向传播时用来计算脉冲函数梯度的替代函数 :param detach_reset: 是否将reset过程的计算图分离,默认为surrogate.Erf() :param monitor_state: 是否设置监视器来保存神经元的电压和释放的脉冲。若为 ``True``,则 ``self.monitor`` 是一个字典,键包括 ``v`` 和 ``s``,分别记录电压和输出脉冲。对应的值是一个链表。为了节省显存(内存),列表中存入的是原始变量转换为 ``numpy`` 数组后的值。还需要注意,``self.reset()`` 函数会清空这些链表, 默认为False :type monitor_state: bool :param dt: 神经元的仿真间隔时间参数, 默认为1.0 :type dt: float `Effective and Efficient Computation with Multiple-timescale Spiking Recurrent Neural Networks <https://arxiv.org/abs/2005.11633>`_ 中提出的自适应阈值神经元模型。在LIF神经元的基础上增加了一个阈值的动态方程: .. math:: \\begin{align} \\eta_t&=\\rho\\eta_{t-1}+(1-\\rho)S_{t-1},\\\\ \\theta_t&=b_0+\\beta\\eta_t, \\end{align} 其中 :math:`\\eta_t` 为t时刻的阈值增幅,:math:`\\rho` 为阈值动态方程中由 ``tau_adp`` 决定的时间常数。:math:`\\theta_t` 为t时刻的电压阈值。 所有神经元动态方程的时间常数均为\ **可学习**\ 的网络参数。 .. hint:: 不同于该模块中的其它神经元层,同层的各神经元不共享时间常数。 * :ref:`中文API <AdaptThresholdNode.__init__-cn>` .. _AdaptThresholdNode.__init__-en: :param neuron_shape: Shape of neuron tensor :type neuron_shape: array_like :param tau_m: Membrane potential time-constant :type tau_m: float :param tau_adp: Threshold time-constant :type tau_adp: float :param v_threshold_baseline: Minimal threshold, also the initial threshold :math:`b_0`, defaults to 1.0 :type v_threshold_baseline: float :param v_threshold_range: Parameter :math:`\\beta` determining the range of threshold to :math:`[b_0,b_0+\\beta]` , defaults to 1.8 :type v_threshold_range: float :param v_reset: Reset voltage of neurons. If not ``None``, voltage of neurons that just fired spikes will be set to ``v_reset``. If ``None``, voltage of neurons that just fired spikes will subtract ``v_threshold``, defaults to 0.0 :type v_reset: float :param surrogate_function: surrogate function for replacing gradient of spiking functions during back-propagation :param detach_reset: whether detach the computation graph of reset, defaults to surrogate.Erf() :param monitor_state: Whether to turn on the monitor, defaults to False :type monitor_state: bool :param dt: Simulation interval constant of neurons, defaults to 1.0 :type dt: float An neuron model with adaptive threshold proposed in `Effective and Efficient Computation with Multiple-timescale Spiking Recurrent Neural Networks <https://arxiv.org/abs/2005.11633>`_. Compared to vanilla LIF neuron, an additional dynamic equation of threshold is added: .. math:: \\begin{align} \\eta_t & = \\rho\\eta_{t-1}+(1-\\rho)S_{t-1},\\\\ \\theta_t & = b_0+\\beta\\eta_t, \\end{align} where :math:`\\eta_t` is the growth of threshold at timestep t, :math:`\\rho` is the time-constant determined by ``tau_adp`` in threshold dynamic. :math:`\\theta_t` is the threshold at timestep t. All time constants in neurons' dynamics are **learnable** network parameters. .. admonition:: Hint :class: hint Different from other types of neuron in this module, time-constant is NOT shared in the same layer. ''' super().__init__() self.neuron_shape = neuron_shape self.b_0 = v_threshold_baseline self.b = 0 self.v_reset = v_reset self.beta = v_threshold_range self.tau_m = nn.Parameter(torch.full(neuron_shape, fill_value=tau_m, dtype=torch.float)) self.tau_adp = nn.Parameter(torch.full(neuron_shape, fill_value=tau_adp, dtype=torch.float)) self.dt = dt self.last_spike = torch.rand(neuron_shape) if self.v_reset is None: self.v = 0.0 else: self.v = self.v_reset self.v_threshold = self.b_0 self.surrogate_function = surrogate_function if monitor_state: self.monitor = {'v': [], 's': []} else: self.monitor = False
def __init__(self, input_size, hidden_size, num_layers, bias=True, dropout_p=0, invariant_dropout_mask=False, bidirectional=False, surrogate_function1=surrogate.Erf(), surrogate_function2=None): ''' * :ref:`API in English <SpikingLSTM.__init__-en>` .. _SpikingLSTM.__init__-cn: 多层`脉冲` 长短时记忆LSTM, 最先由 `Long Short-Term Memory Spiking Networks and Their Applications <https://arxiv.org/abs/2007.04779>`_ 一文提出。 每一层的计算按照 .. math:: i_{t} &= \\Theta(W_{ii} x_{t} + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\\\ f_{t} &= \\Theta(W_{if} x_{t} + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\\\ g_{t} &= \\Theta(W_{ig} x_{t} + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\\\ o_{t} &= \\Theta(W_{io} x_{t} + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\\\ c_{t} &= f_{t} * c_{t-1} + i_{t} * g_{t} \\\\ h_{t} &= o_{t} * c_{t-1}' 其中 :math:`h_{t}` 是 :math:`t` 时刻的隐藏状态,:math:`c_{t}` 是 :math:`t` 时刻的细胞状态,:math:`h_{t-1}` 是该层 :math:`t-1` 时刻的隐藏状态或起始状态,:math:`i_{t}`,:math:`f_{t}`,:math:`g_{t}`,:math:`o_{t}` 分别是输入,遗忘,细胞,输出门, :math:`\\Theta` 是heaviside阶跃函数(脉冲函数), and :math:`*` 是Hadamard点积,即逐元素相乘。 :param input_size: 输入 ``x`` 的特征数 :type input_size: int :param hidden_size: 隐藏状态 ``h`` 的特征数 :type hidden_size: int :param num_layers: 内部RNN的层数,例如 ``num_layers = 2`` 将会创建堆栈式的两层RNN,第1层接收第0层的输出作为输入, 并计算最终输出 :type num_layers: int :param bias: 若为 ``False``, 则内部的隐藏层不会带有偏置项 ``b_ih`` 和 ``b_hh``。 默认为 ``True`` :type bias: bool :param dropout_p: 若非 ``0``,则除了最后一层,每个RNN层后会增加一个丢弃概率为 ``dropout_p`` 的 `Dropout` 层。 默认为 ``0`` :type dropout_p: float :param invariant_dropout_mask: 若为 ``False``,则使用普通的 `Dropout`;若为 ``True``,则使用SNN中特有的,`mask` 不 随着时间变化的 `Dropout``,参见 :class:`~spikingjelly.clock_driven.layer.Dropout`。默认为 ``False`` :type invariant_dropout_mask: bool :param bidirectional: 若为 ``True``,则使用双向RNN。默认为 ``False`` :type bidirectional: bool :param surrogate_function1: 反向传播时用来计算脉冲函数梯度的替代函数, 计算 ``i``, ``f``, ``o`` 反向传播时使用 :type surrogate_function1: spikingjelly.clock_driven.surrogate.SurrogateFunctionBase :param surrogate_function2: 反向传播时用来计算脉冲函数梯度的替代函数, 计算 ``g`` 反向传播时使用。 若为 ``None``, 则设置成 ``surrogate_function1``。默认为 ``None`` :type surrogate_function2: None or spikingjelly.clock_driven.surrogate.SurrogateFunctionBase * :ref:`中文API <SpikingLSTM.__init__-cn>` .. _SpikingLSTM.__init__-en: The `spiking` multi-layer long short-term memory (LSTM), which is firstly proposed in `Long Short-Term Memory Spiking Networks and Their Applications <https://arxiv.org/abs/2007.04779>`_. For each element in the input sequence, each layer computes the following function: .. math:: i_{t} &= \\Theta(W_{ii} x_{t} + b_{ii} + W_{hi} h_{t-1} + b_{hi}) \\\\ f_{t} &= \\Theta(W_{if} x_{t} + b_{if} + W_{hf} h_{t-1} + b_{hf}) \\\\ g_{t} &= \\Theta(W_{ig} x_{t} + b_{ig} + W_{hg} h_{t-1} + b_{hg}) \\\\ o_{t} &= \\Theta(W_{io} x_{t} + b_{io} + W_{ho} h_{t-1} + b_{ho}) \\\\ c_{t} &= f_{t} * c_{t-1} + i_{t} * g_{t} \\\\ h_{t} &= o_{t} * c_{t-1}' where :math:`h_t` is the hidden state at time `t`, :math:`c_t` is the cell state at time `t`, :math:`x_t` is the input at time `t`, :math:`h_{t-1}` is the hidden state of the layer at time `t-1` or the initial hidden state at time `0`, and :math:`i_t`, :math:`f_t`, :math:`g_t`, :math:`o_t` are the input, forget, cell, and output gates, respectively. :math:`\\Theta` is the heaviside function, and :math:`*` is the Hadamard product. :param input_size: The number of expected features in the input ``x`` :type input_size: int :param hidden_size: The number of features in the hidden state ``h`` :type hidden_size: int :param num_layers: Number of recurrent layers. E.g., setting ``num_layers=2`` would mean stacking two LSTMs together to form a `stacked RNN`, with the second RNN taking in outputs of the first RNN and computing the final results :type num_layers: int :param bias: If ``False``, then the layer does not use bias weights `b_ih` and `b_hh`. Default: ``True`` :type bias: bool :param dropout_p: If non-zero, introduces a `Dropout` layer on the outputs of each RNN layer except the last layer, with dropout probability equal to :attr:`dropout`. Default: 0 :type dropout_p: float :param invariant_dropout_mask: If ``False``,use the naive `Dropout`;If ``True``,use the dropout in SNN that `mask` doesn't change in different time steps, see :class:`~spikingjelly.clock_driven.layer.Dropout` for more information. Defaule: ``False`` :type invariant_dropout_mask: bool :param bidirectional: If ``True``, becomes a bidirectional LSTM. Default: ``False`` :type bidirectional: bool :param surrogate_function1: surrogate function for replacing gradient of spiking functions during back-propagation, which is used for generating ``i``, ``f``, ``o`` :type surrogate_function1: spikingjelly.clock_driven.surrogate.SurrogateFunctionBase :param surrogate_function2: surrogate function for replacing gradient of spiking functions during back-propagation, which is used for generating ``g``. If ``None``, the surrogate function for generating ``g`` will be set as ``surrogate_function1``. Default: ``None`` :type surrogate_function2: None or spikingjelly.clock_driven.surrogate.SurrogateFunctionBase ''' super().__init__(input_size, hidden_size, num_layers, bias, dropout_p, invariant_dropout_mask, bidirectional, surrogate_function1, surrogate_function2)
def __init__(self, input_size: int, hidden_size: int, bias=True, surrogate_function1=surrogate.Erf(), surrogate_function2=None): ''' * :ref:`API in English <SpikingLSTMCell.__init__-en>` .. _SpikingLSTMCell.__init__-cn: `脉冲` 长短时记忆 (LSTM) cell, 最先由 `Long Short-Term Memory Spiking Networks and Their Applications <https://arxiv.org/abs/2007.04779>`_ 一文提出。 .. math:: i &= \\Theta(W_{ii} x + b_{ii} + W_{hi} h + b_{hi}) \\\\ f &= \\Theta(W_{if} x + b_{if} + W_{hf} h + b_{hf}) \\\\ g &= \\Theta(W_{ig} x + b_{ig} + W_{hg} h + b_{hg}) \\\\ o &= \\Theta(W_{io} x + b_{io} + W_{ho} h + b_{ho}) \\\\ c' &= f * c + i * g \\\\ h' &= o * c' 其中 :math:`\\Theta` 是heaviside阶跃函数(脉冲函数), and :math:`*` 是Hadamard点积,即逐元素相乘。 :param input_size: 输入 ``x`` 的特征数 :type input_size: int :param hidden_size: 隐藏状态 ``h`` 的特征数 :type hidden_size: int :param bias: 若为 ``False``, 则内部的隐藏层不会带有偏置项 ``b_ih`` 和 ``b_hh``。 默认为 ``True`` :type bias: bool :param surrogate_function1: 反向传播时用来计算脉冲函数梯度的替代函数, 计算 ``i``, ``f``, ``o`` 反向传播时使用 :type surrogate_function1: spikingjelly.clock_driven.surrogate.SurrogateFunctionBase :param surrogate_function2: 反向传播时用来计算脉冲函数梯度的替代函数, 计算 ``g`` 反向传播时使用。 若为 ``None``, 则设置成 ``surrogate_function1``。默认为 ``None`` :type surrogate_function2: None or spikingjelly.clock_driven.surrogate.SurrogateFunctionBase .. note:: 所有权重和偏置项都会按照 :math:`\\mathcal{U}(-\\sqrt{k}, \\sqrt{k})` 进行初始化。 其中 :math:`k = \\frac{1}{\\text{hidden_size}}`. 示例代码: .. code-block:: python T = 6 batch_size = 2 input_size = 3 hidden_size = 4 rnn = rnn.SpikingLSTMCell(input_size, hidden_size) input = torch.randn(T, batch_size, input_size) * 50 h = torch.randn(batch_size, hidden_size) c = torch.randn(batch_size, hidden_size) output = [] for t in range(T): h, c = rnn(input[t], (h, c)) output.append(h) print(output) * :ref:`中文API <SpikingLSTMCell.__init__-cn>` .. _SpikingLSTMCell.__init__-en: A `spiking` long short-term memory (LSTM) cell, which is firstly proposed in `Long Short-Term Memory Spiking Networks and Their Applications <https://arxiv.org/abs/2007.04779>`_. .. math:: i &= \\Theta(W_{ii} x + b_{ii} + W_{hi} h + b_{hi}) \\\\ f &= \\Theta(W_{if} x + b_{if} + W_{hf} h + b_{hf}) \\\\ g &= \\Theta(W_{ig} x + b_{ig} + W_{hg} h + b_{hg}) \\\\ o &= \\Theta(W_{io} x + b_{io} + W_{ho} h + b_{ho}) \\\\ c' &= f * c + i * g \\\\ h' &= o * c' where :math:`\\Theta` is the heaviside function, and :math:`*` is the Hadamard product. :param input_size: The number of expected features in the input ``x`` :type input_size: int :param hidden_size: int :type hidden_size: The number of features in the hidden state ``h`` :param bias: If ``False``, then the layer does not use bias weights ``b_ih`` and ``b_hh``. Default: ``True`` :type bias: bool :param surrogate_function1: surrogate function for replacing gradient of spiking functions during back-propagation, which is used for generating ``i``, ``f``, ``o`` :type surrogate_function1: spikingjelly.clock_driven.surrogate.SurrogateFunctionBase :param surrogate_function2: surrogate function for replacing gradient of spiking functions during back-propagation, which is used for generating ``g``. If ``None``, the surrogate function for generating ``g`` will be set as ``surrogate_function1``. Default: ``None`` :type surrogate_function2: None or spikingjelly.clock_driven.surrogate.SurrogateFunctionBase .. admonition:: Note :class: note All the weights and biases are initialized from :math:`\\mathcal{U}(-\\sqrt{k}, \\sqrt{k})` where :math:`k = \\frac{1}{\\text{hidden_size}}`. Examples: .. code-block:: python T = 6 batch_size = 2 input_size = 3 hidden_size = 4 rnn = rnn.SpikingLSTMCell(input_size, hidden_size) input = torch.randn(T, batch_size, input_size) * 50 h = torch.randn(batch_size, hidden_size) c = torch.randn(batch_size, hidden_size) output = [] for t in range(T): h, c = rnn(input[t], (h, c)) output.append(h) print(output) ''' super().__init__(input_size, hidden_size, bias) self.linear_ih = nn.Linear(input_size, 4 * hidden_size, bias=bias) self.linear_hh = nn.Linear(hidden_size, 4 * hidden_size, bias=bias) self.surrogate_function1 = surrogate_function1 self.surrogate_function2 = surrogate_function2 if self.surrogate_function2 is not None: assert self.surrogate_function1.spiking == self.surrogate_function2.spiking self.reset_parameters()