Ejemplo n.º 1
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int,
                 use_position: bool = False,
                 dropout: float = 0.0):
        super(Bengio03ResNetBiLm, self).__init__()
        self.use_position = use_position
        self.n_layers = n_layers

        self.dropout = torch.nn.Dropout(p=dropout)
        self.activation = torch.nn.ReLU()

        self.width = width
        self.input_size = input_size
        self.context_input_size = input_size * (width + 1)
        self.hidden_size = hidden_size

        forward_paddings, backward_paddings = [], []
        forward_projects, backward_projects = [], []
        for i in range(n_layers):
            forward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))
            backward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))

            forward_projects.append(
                torch.nn.Linear(self.context_input_size, hidden_size))
            backward_projects.append(
                torch.nn.Linear(self.context_input_size, hidden_size))

        self.forward_projects = torch.nn.ModuleList(forward_projects)
        self.backward_projects = torch.nn.ModuleList(backward_projects)

        self.forward_paddings = torch.nn.ParameterList(forward_paddings)
        self.backward_paddings = torch.nn.ParameterList(backward_paddings)

        self.left_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size,
                                    self.config['dropout'])
            for _ in range(n_layers)
        ])
        self.right_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size,
                                    self.config['dropout'])
            for _ in range(n_layers)
        ])

        self.left_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, self.config['dropout'])
            for _ in range(n_layers)
        ])
        self.right_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, self.config['dropout'])
            for _ in range(n_layers)
        ])
Ejemplo n.º 2
0
    def __init__(self,
                 width: int,
                 input_size: int,
                 hidden_size: int,
                 n_layers: int,
                 use_position: bool = False,
                 dropout: float = 0.0):
        super(LBLResNetBiLm, self).__init__()
        self.use_position = use_position

        self.dropout = torch.nn.Dropout(dropout)
        self.activation = torch.nn.ReLU()

        self.width = width
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.n_layers = n_layers

        forward_paddings, backward_paddings = [], []
        forward_weights, backward_weights = [], []
        for _ in range(self.n_layers):
            forward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))
            backward_paddings.append(
                torch.nn.Parameter(
                    torch.randn(width, hidden_size) / np.sqrt(hidden_size)))
            forward_weights.append(torch.nn.Parameter(torch.randn(width + 1)))
            backward_weights.append(torch.nn.Parameter(torch.randn(width + 1)))

        self.forward_paddings = torch.nn.ParameterList(forward_paddings)
        self.backward_paddings = torch.nn.ParameterList(backward_paddings)
        self.forward_weights = torch.nn.Parameter(forward_weights)
        self.backward_weights = torch.nn.Parameter(backward_weights)

        if self.use_position:
            self.position = PositionalEncoding(hidden_size)

        self.forward_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size, dropout)
            for _ in range(n_layers)
        ])
        self.backward_linears = torch.nn.ModuleList([
            PositionwiseFeedForward(hidden_size, hidden_size, dropout)
            for _ in range(n_layers)
        ])

        self.forward_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, dropout) for _ in range(n_layers)
        ])
        self.backward_blocks = torch.nn.ModuleList([
            SublayerConnection(hidden_size, dropout) for _ in range(n_layers)
        ])
Ejemplo n.º 3
0
 def __init__(self, size: int, self_attn: MultiHeadedAttention,
              src_attn: MultiHeadedAttention, feed_forward: F,
              dropout: float) -> None:
     super(DecoderLayer, self).__init__()
     self.size = size
     self.self_attn = self_attn
     self.src_attn = src_attn
     self.feed_forward = feed_forward
     self.sublayer = _clones(SublayerConnection(size, dropout), 3)
 def __init__(
     self,
     size: int,
     self_attn: MultiHeadedAttention,
     src_attn: MultiHeadedAttention,
     feed_forward: F,
     dropout: float,
 ) -> None:
     super().__init__()
     self.size = size
     self.self_attn = self_attn
     self.src_attn = src_attn
     self.feed_forward = feed_forward
     self.sublayer = nn_util.clone(SublayerConnection(size, dropout), 3)