class VarMaskedLSTMCell(VarMaskedRNNCellBase): def __init__(self, input_size, hidden_size, bias=True, p=(0.5, 0.5), initializer=None): super(VarMaskedLSTMCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.weight_ih = Parameter(torch.Tensor(4, input_size, hidden_size)) self.weight_hh = Parameter(torch.Tensor(4, hidden_size, hidden_size)) if bias: self.bias_ih = Parameter(torch.Tensor(4, hidden_size)) self.bias_hh = Parameter(torch.Tensor(4, hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) self.initializer = default_initializer(self.hidden_size) if initializer is None else initializer self.reset_parameters() p_in, p_hidden = p if p_in < 0 or p_in > 1: raise ValueError("input dropout probability has to be between 0 and 1, " "but got {}".format(p_in)) if p_hidden < 0 or p_hidden > 1: raise ValueError("hidden state dropout probability has to be between 0 and 1, " "but got {}".format(p_hidden)) self.p_in = p_in self.p_hidden = p_hidden self.noise_in = None self.noise_hidden = None def reset_parameters(self): for weight in self.parameters(): if weight.dim() == 2: nn.init.constant_(weight, 0.) else: self.initializer(weight.data) def reset_noise(self, batch_size): if self.training: if self.p_in: noise = self.weight_ih.new_empty(4, batch_size, self.input_size) self.noise_in = noise.bernoulli_(1.0 - self.p_in) / (1.0 - self.p_in) else: self.noise_in = None if self.p_hidden: noise = self.weight_hh.new_empty(4, batch_size, self.hidden_size) self.noise_hidden = noise.bernoulli_(1.0 - self.p_hidden) / (1.0 - self.p_hidden) else: self.noise_hidden = None else: self.noise_in = None self.noise_hidden = None def forward(self, input, hx): return rnn_F.VarLSTMCell( input, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh, self.noise_in, self.noise_hidden, )
class SkipConnectGRUCell(VarRNNCellBase): """A gated recurrent unit (GRU) cell with skip connections and variational dropout. .. math:: \begin{array}{ll} r = \mathrm{sigmoid}(W_{ir} x + b_{ir} + W_{hr} h + b_{hr}) \\ z = \mathrm{sigmoid}(W_{iz} x + b_{iz} + W_{hz} h + b_{hz}) \\ n = \tanh(W_{in} x + b_{in} + r * (W_{hn} h + b_{hn})) \\ h' = (1 - z) * n + z * h \end{array} Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h bias: If `False`, then the layer does not use bias weights `b_ih` and `b_hh`. Default: `True` p: (p_in, p_hidden) (tuple, optional): the drop probability for input and hidden. Default: (0.5, 0.5) Inputs: input, hidden, h_s - **input** (batch, model_dim): tensor containing input features - **hidden** (batch, hidden_size): tensor containing the initial hidden state for each element in the batch. - **h_s** (batch. hidden_size): tensor containing the skip connection state for each element in the batch. Outputs: h' - **h'**: (batch, hidden_size): tensor containing the next hidden state for each element in the batch Attributes: weight_ih: the learnable input-hidden weights, of shape `(3 x model_dim x hidden_size)` weight_hh: the learnable hidden-hidden weights, of shape `(3x 2*hidden_size x hidden_size)` bias_ih: the learnable input-hidden bias, of shape `(3 x hidden_size)` bias_hh: the learnable hidden-hidden bias, of shape `(3 x hidden_size)` """ def __init__(self, input_size, hidden_size, bias=True, p=(0.5, 0.5)): super(SkipConnectGRUCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.weight_ih = Parameter(torch.Tensor(3, input_size, hidden_size)) self.weight_hh = Parameter(torch.Tensor(3, hidden_size * 2, hidden_size)) if bias: self.bias_ih = Parameter(torch.Tensor(3, hidden_size)) self.bias_hh = Parameter(torch.Tensor(3, hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) self.reset_parameters() p_in, p_hidden = p if p_in < 0 or p_in > 1: raise ValueError("input dropout probability has to be between 0 and 1, " "but got {}".format(p_in)) if p_hidden < 0 or p_hidden > 1: raise ValueError("hidden state dropout probability has to be between 0 and 1, " "but got {}".format(p_hidden)) self.p_in = p_in self.p_hidden = p_hidden self.noise_in = None self.noise_hidden = None def reset_parameters(self): nn.init.xavier_uniform_(self.weight_hh) nn.init.xavier_uniform_(self.weight_ih) if self.bias: nn.init.constant_(self.bias_hh, 0.) nn.init.constant_(self.bias_ih, 0.) def reset_noise(self, batch_size): if self.training: if self.p_in: noise = self.weight_ih.new_empty(3, batch_size, self.input_size) self.noise_in = noise.bernoulli_(1.0 - self.p_in) / (1.0 - self.p_in) else: self.noise_in = None if self.p_hidden: noise = self.weight_hh.new_empty(3, batch_size, self.hidden_size * 2) self.noise_hidden = noise.bernoulli_(1.0 - self.p_hidden) / (1.0 - self.p_hidden) else: self.noise_hidden = None else: self.noise_in = None self.noise_hidden = None def forward(self, input, hx, hs): return rnn_F.SkipConnectGRUCell( input, hx, hs, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh, self.noise_in, self.noise_hidden, )
class SkipConnectLSTMCell(VarRNNCellBase): """ A long short-term memory (LSTM) cell with skip connections and variational dropout. .. math:: \begin{array}{ll} i = \mathrm{sigmoid}(W_{ii} x + b_{ii} + W_{hi} h + b_{hi}) \\ f = \mathrm{sigmoid}(W_{if} x + b_{if} + W_{hf} h + b_{hf}) \\ g = \tanh(W_{ig} x + b_{ig} + W_{hc} h + b_{hg}) \\ o = \mathrm{sigmoid}(W_{io} x + b_{io} + W_{ho} h + b_{ho}) \\ c' = f * c + i * g \\ h' = o * \tanh(c') \\ \end{array} Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h bias: If `False`, then the layer does not use bias weights `b_ih` and `b_hh`. Default: True p: (p_in, p_hidden) (tuple, optional): the drop probability for input and hidden. Default: (0.5, 0.5) Inputs: input, (h_0, c_0), h_s - **input** (batch, model_dim): tensor containing input features - **h_0** (batch, hidden_size): tensor containing the initial hidden state for each element in the batch. - **c_0** (batch. hidden_size): tensor containing the initial cell state for each element in the batch. **h_s** (batch. hidden_size): tensor containing the skip connection state for each element in the batch. Outputs: h_1, c_1 - **h_1** (batch, hidden_size): tensor containing the next hidden state for each element in the batch - **c_1** (batch, hidden_size): tensor containing the next cell state for each element in the batch Attributes: weight_ih: the learnable input-hidden weights, of shape `(4 x model_dim x hidden_size)` weight_hh: the learnable hidden-hidden weights, of shape `(4 x 2*hidden_size x hidden_size)` bias_ih: the learnable input-hidden bias, of shape `(4 x hidden_size)` bias_hh: the learnable hidden-hidden bias, of shape `(4 x hidden_size)` """ def __init__(self, input_size, hidden_size, bias=True, p=(0.5, 0.5)): super(SkipConnectLSTMCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.weight_ih = Parameter(torch.Tensor(4, input_size, hidden_size)) self.weight_hh = Parameter(torch.Tensor(4, 2 * hidden_size, hidden_size)) if bias: self.bias_ih = Parameter(torch.Tensor(4, hidden_size)) self.bias_hh = Parameter(torch.Tensor(4, hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) self.reset_parameters() p_in, p_hidden = p if p_in < 0 or p_in > 1: raise ValueError("input dropout probability has to be between 0 and 1, " "but got {}".format(p_in)) if p_hidden < 0 or p_hidden > 1: raise ValueError("hidden state dropout probability has to be between 0 and 1, " "but got {}".format(p_hidden)) self.p_in = p_in self.p_hidden = p_hidden self.noise_in = None self.noise_hidden = None def reset_parameters(self): nn.init.xavier_uniform_(self.weight_hh) nn.init.xavier_uniform_(self.weight_ih) if self.bias: nn.init.constant_(self.bias_hh, 0.) nn.init.constant_(self.bias_ih, 0.) def reset_noise(self, batch_size): if self.training: if self.p_in: noise = self.weight_ih.new_empty(4, batch_size, self.input_size) self.noise_in = noise.bernoulli_(1.0 - self.p_in) / (1.0 - self.p_in) else: self.noise_in = None if self.p_hidden: noise = self.weight_hh.new_empty(4, batch_size, self.hidden_size * 2) self.noise_hidden = noise.bernoulli_(1.0 - self.p_hidden) / (1.0 - self.p_hidden) else: self.noise_hidden = None else: self.noise_in = None self.noise_hidden = None def forward(self, input, hx, hs): return rnn_F.SkipConnectLSTMCell( input, hx, hs, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh, self.noise_in, self.noise_hidden, )
class SkipConnectRNNCell(VarRNNCellBase): r"""An Elman RNN cell with tanh non-linearity and variational dropout. .. math:: h' = \tanh(w_{ih} * x + b_{ih} + w_{hh} * (h * \gamma) + b_{hh}) Args: input_size: The number of expected features in the input x hidden_size: The number of features in the hidden state h bias: If False, then the layer does not use bias weights b_ih and b_hh. Default: True nonlinearity: The non-linearity to use ['tanh'|'relu']. Default: 'tanh' p: (p_in, p_hidden) (tuple, optional): the drop probability for input and hidden. Default: (0.5, 0.5) Inputs: input, hidden, h_s - **input** (batch, model_dim): tensor containing input features - **hidden** (batch, hidden_size): tensor containing the initial hidden state for each element in the batch. - **h_s** (batch. hidden_size): tensor containing the skip connection state for each element in the batch. Outputs: h' - **h'** (batch, hidden_size): tensor containing the next hidden state for each element in the batch Attributes: weight_ih: the learnable input-hidden weights, of shape `(model_dim x hidden_size)` weight_hh: the learnable hidden-hidden weights, of shape `(hidden_size x 2*hidden_size)` bias_ih: the learnable input-hidden bias, of shape `(hidden_size)` bias_hh: the learnable hidden-hidden bias, of shape `(hidden_size)` """ def __init__(self, input_size, hidden_size, bias=True, nonlinearity="tanh", p=(0.5, 0.5)): super(SkipConnectRNNCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.nonlinearity = nonlinearity self.weight_ih = Parameter(torch.Tensor(hidden_size, input_size)) self.weight_hh = Parameter(torch.Tensor(hidden_size, hidden_size * 2)) if bias: self.bias_ih = Parameter(torch.Tensor(hidden_size)) self.bias_hh = Parameter(torch.Tensor(hidden_size)) else: self.register_parameter('bias_ih', None) self.register_parameter('bias_hh', None) self.reset_parameters() p_in, p_hidden = p if p_in < 0 or p_in > 1: raise ValueError("input dropout probability has to be between 0 and 1, " "but got {}".format(p_in)) if p_hidden < 0 or p_hidden > 1: raise ValueError("hidden state dropout probability has to be between 0 and 1, " "but got {}".format(p_hidden)) self.p_in = p_in self.p_hidden = p_hidden self.noise_in = None self.noise_hidden = None def reset_parameters(self): nn.init.xavier_uniform_(self.weight_hh) nn.init.xavier_uniform_(self.weight_ih) if self.bias: nn.init.constant_(self.bias_hh, 0.) nn.init.constant_(self.bias_ih, 0.) def reset_noise(self, batch_size): if self.training: if self.p_in: noise = self.weight_ih.new_empty(batch_size, self.input_size) self.noise_in = noise.bernoulli_(1.0 - self.p_in) / (1.0 - self.p_in) else: self.noise_in = None if self.p_hidden: noise = self.weight_hh.new_empty(batch_size, self.hidden_size * 2) self.noise_hidden = noise.bernoulli_(1.0 - self.p_hidden) / (1.0 - self.p_hidden) else: self.noise_hidden = None else: self.noise_in = None self.noise_hidden = None def forward(self, input, hx, hs): if self.nonlinearity == "tanh": func = rnn_F.SkipConnectRNNTanhCell elif self.nonlinearity == "relu": func = rnn_F.SkipConnectRNNReLUCell else: raise RuntimeError( "Unknown nonlinearity: {}".format(self.nonlinearity)) return func( input, hx, hs, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh, self.noise_in, self.noise_hidden, )
class VarRNNCell(VarRNNCellBase): def __init__(self, input_size, hidden_size, bias=True, nonlinearity="tanh", p=(0.5, 0.5), initializer=None): super(VarRNNCell, self).__init__() self.input_size = input_size self.hidden_size = hidden_size self.bias = bias self.nonlinearity = nonlinearity self.weight_ih = Parameter(torch.Tensor(hidden_size, input_size)) self.weight_hh = Parameter(torch.Tensor(hidden_size, hidden_size)) if bias: self.bias_ih = Parameter(torch.Tensor(hidden_size)) self.bias_hh = Parameter(torch.Tensor(hidden_size)) else: self.register_parameter('bias_id', None) self.register_parameter('bias_hh', None) self.initializer = default_initializer( self.hidden_size) if initializer is None else initializer self.reset_parameters() p_in, p_hidden = p if p_in < 0 or p_in > 1: raise ValueError("input dropout probability...") if p_hidden < 0 or p_hidden > 1: raise ValueError("hidden state dropout probability...") self.p_in = p_in self.p_hidden = p_hidden self.noise_in = None self.noise_hidden = None def reset_parameters(self): for weight in self.parameters(): if weight.dim() == 1: nn.init.constant_(weight, 0.) else: self.initializer(weight.data) def reset_noise(self, batch_size): if self.training: if self.p_in: noise = self.weight_ih.new_empty(batch_size, self.input_size) self.noise_in = noise.bernoulli_(1.0 - self.p_in) / (1.0 - self.p_in) else: self.noise_in = None if self.p_hidden: noise = self.weight_hh.new_empty(batch_size, self.hidden_size) self.noise_hidden = noise.bernoulli_(1.0 - self.p_hidden) / ( 1.0 - self.p_hidden) else: self.noise_hidden = None else: self.noise_in = None self.noise_hidden = None def forward(self, input, hx): if self.nonlinearity == "tanh": func = rnn_F.VarRNNTanhCell elif self.nonlinearity == "relu": func = rnn_F.VarRNNReLUCell else: raise RuntimeError("Unknown nonlinearity: {}".format( self.nonlinearity)) return func( input, hx, self.weight_ih, self.weight_hh, self.bias_ih, self.bias_hh, self.noise_in, self.noise_hidden, )