Esempio n. 1
0
class LSTM(nn.Module):
    def __init__(self,
                 input_size,
                 hidden_size,
                 bias=True,
                 attention=False,
                 is_cuda=False):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, bias=bias)
        self.attention = attention
        if attention:
            self.attention_linear = nn.Linear(2 * hidden_size,
                                              hidden_size,
                                              bias=bias)
            self.ws = Parameter(torch.randn(hidden_size, hidden_size))
            self.vs = Parameter(torch.randn(hidden_size, 1))
        self.previous_hidden = None
        self.device = "cuda:0" if is_cuda else "cpu"

    def detach_attention_params(self):
        self.ws.detach_().requires_grad = True
        self.vs.detach_().requires_grad = True

    def __str__(self):
        return 'LSTM ({}, {} attention:{})'.format(self.input_size,
                                                   self.hidden_size,
                                                   self.attention)

    def reset_parameters(self):
        std = 1.0 / math.sqrt(self.hidden_size)
        for w in self.parameters():
            w.data.uniform_(-std, std)

    def forward(self, input, state):
        batch_size = input.size(0)
        output, new_state = self.lstm(input.unsqueeze(0), state)
        if not self.attention:
            return output.squeeze(0), state

        if self.previous_hidden is None:
            self.previous_hidden = torch.zeros(
                (batch_size, 1, self.hidden_size), device=self.device)

        combined_hy = decoder_function(output.squeeze(0), self.previous_hidden,
                                       self.ws, self.vs)
        new_hy = self.attention_linear(combined_hy)
        self.previous_hidden = torch.cat(
            [self.previous_hidden,
             output.permute([1, 0, 2])], dim=1)
        return new_hy, state
Esempio n. 2
0
 def _init_weight(out: nn.Parameter):
     """Identical to the XLM create_sinusoidal_embeddings except features are not interleaved.
         The cos features are in the 2nd half of the vector. [dim // 2:]
     """
     n_pos, dim = out.shape
     position_enc = np.array(
         [[pos / np.power(10000, 2 * (j // 2) / dim) for j in range(dim)] for pos in range(n_pos)]
     )
     out[:, 0 : dim // 2] = torch.FloatTensor(np.sin(position_enc[:, 0::2]))  # This line breaks for odd n_pos
     out[:, dim // 2 :] = torch.FloatTensor(np.cos(position_enc[:, 1::2]))
     out.detach_()
     out.requires_grad = False
     return out
Esempio n. 3
0
 def _init_weight(out: nn.Parameter):
     n_pos, dim = out.shape
     position_enc = np.array(
         [
             [pos / np.power(10000, 2 * (j // 2) / dim) for j in range(dim)]
             for pos in range(n_pos)
         ]
     )
     out.requires_grad = False  # set early to avoid an error in pytorch-1.8+
     sentinel = dim // 2 if dim % 2 == 0 else (dim // 2) + 1
     out[:, 0:sentinel] = torch.FloatTensor(np.sin(position_enc[:, 0::2]))
     out[:, sentinel:] = torch.FloatTensor(np.cos(position_enc[:, 1::2]))
     out.detach_()
     return out
Esempio n. 4
0
 def _init_weight(out: nn.Parameter):
     """
     Identical to the XLM create_sinusoidal_embeddings except features are not interleaved. The cos features are in
     the 2nd half of the vector. [dim // 2:]
     """
     n_pos, dim = out.shape
     position_enc = np.array(
         [[pos / np.power(10000, 2 * (j // 2) / dim) for j in range(dim)]
          for pos in range(n_pos)])
     out.requires_grad = False  # set early to avoid an error in pytorch-1.8+
     sentinel = dim // 2 if dim % 2 == 0 else (dim // 2) + 1
     out[:, 0:sentinel] = torch.FloatTensor(np.sin(position_enc[:, 0::2]))
     out[:, sentinel:] = torch.FloatTensor(np.cos(position_enc[:, 1::2]))
     out.detach_()
     return out
Esempio n. 5
0
class GatedResidual(nn.Module):
    def __init__(self, layer, gate_init=0.0):
        super().__init__()
        self.layer = layer
        self.alpha = Parameter(torch.tensor([gate_init]))

    def forward(self, x):
        gate = activation.sigmoid(self.alpha)
        y = self.layer(x)
        return gate * x + (1 - gate) * y

    def json(self, params=False):
        res = OrderedDict([('type', "GatedResidual"),
                           ('sublayers', self.layer.json(params))])
        if params:
            res['params'] = OrderedDict([('alpha', float(self.alpha.detach_().numpy()[0]))])
        return res