Ejemplo n.º 1
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob)

            # Note: Please use `ModuleList` in new code. It provides better
            # support for running on multiple GPUs. We've kept `add_module` here
            # solely for backwards compatibility with existing serialized models.
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}",
                            feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
                attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob)

            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}",
                            feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
                attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Ejemplo n.º 3
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 use_positional_encoding: bool = True,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super(StackedSelfAttentionEncoder, self).__init__()

        self._use_positional_encoding = use_positional_encoding
        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(feedfoward_input_dim,
                                     activations=[Activation.by_name('relu')(),
                                                  Activation.by_name('linear')()],
                                     hidden_dims=[feedforward_hidden_dim, hidden_dim],
                                     num_layers=2,
                                     dropout=dropout_prob)

            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}", feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(num_heads=num_attention_heads,
                                                    input_dim=hidden_dim,
                                                    attention_dim=projection_dim,
                                                    values_dim=projection_dim,
                                                    attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Ejemplo n.º 4
0
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int,
        attention_projection_dim: int,
        feedforward_hidden_dim: int,
        num_convs: int,
        conv_kernel_size: int,
        num_attention_heads: int,
        use_positional_encoding: bool = True,
        dropout_prob: float = 0.1,
        layer_dropout_undecayed_prob: float = 0.1,
        attention_dropout_prob: float = 0,
    ) -> None:
        super().__init__()

        check_dimensions_match(input_dim, hidden_dim, "input_dim", "hidden_dim")

        self._use_positional_encoding = use_positional_encoding

        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)]
        )
        self._conv_layers = torch.nn.ModuleList()
        for _ in range(num_convs):
            padding = torch.nn.ConstantPad1d(
                (conv_kernel_size // 2, (conv_kernel_size - 1) // 2), 0
            )
            depthwise_conv = torch.nn.Conv1d(
                hidden_dim, hidden_dim, conv_kernel_size, groups=hidden_dim
            )
            pointwise_conv = torch.nn.Conv1d(hidden_dim, hidden_dim, 1)
            self._conv_layers.append(
                torch.nn.Sequential(
                    padding, depthwise_conv, pointwise_conv, Activation.by_name("relu")()
                )
            )

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.attention_layer = MultiHeadSelfAttention(
            num_heads=num_attention_heads,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob,
        )
        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob,
        )

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim
    def __init__(self, input_dim: int, dropout_prob: float = 0.0) -> None:

        super(MultiHeadAttentionEncoder, self).__init__()

        self._self_attention = MultiHeadSelfAttention(1, input_dim, input_dim,
                                                      input_dim, 1)
        self._dropout = torch.nn.Dropout(dropout_prob)

        self._output_dim = input_dim
        self.input_dim = input_dim
Ejemplo n.º 6
0
 def __init__(self, input_field_name: str, output_dim: int,
              embeddings: Embeddings, dropout: float, use_cuda: bool):
     super(LstmTokenEmbedder, self).__init__(input_field_name)
     self.embeddings = embeddings
     self.output_dim = output_dim
     self.use_cuda = use_cuda
     self.encoder_ = torch.nn.LSTM(embeddings.get_embed_dim(),
                                   embeddings.get_embed_dim(),
                                   num_layers=1,
                                   bidirectional=False,
                                   batch_first=True,
                                   dropout=dropout)
     self.attention = MultiHeadSelfAttention(
         num_heads=1,
         input_dim=embeddings.get_embed_dim(),
         attention_dim=embeddings.get_embed_dim(),
         values_dim=embeddings.get_embed_dim(),
         attention_dropout_prob=dropout)
    def from_params(
        cls, vocab: Vocabulary, params: Params
    ) -> 'DialogueContextHierarchicalCoherenceAttentionClassifier':
        embedder_params = params.pop("text_field_embedder")
        text_field_embedder = TextFieldEmbedder.from_params(
            vocab, embedder_params)

        utterance_encoder = Seq2VecEncoder.from_params(
            params.pop("utterance_encoder"))

        attend_feedforward = FeedForward.from_params(
            params.pop('attend_feedforward'))
        #similarity_function = SimilarityFunction.from_params(params.pop("similarity_function"))
        compare_feedforward = FeedForward.from_params(
            params.pop('compare_feedforward'))
        classifier_feedforward = FeedForward.from_params(
            params.pop("classifier_feedforward"))
        final_classifier_feedforward = FeedForward.from_params(
            params.pop("final_classifier_feedforward"))

        initializer = InitializerApplicator.from_params(
            params.pop("initializer", []))
        regularizer = RegularizerApplicator.from_params(
            params.pop("regularizer", []))

        #matrix_attention = MatrixAttention().from_params(params.pop("similarity_function"))
        matrix_attention = MultiHeadSelfAttention.from_params(
            params.pop("similarity_function"))

        return cls(vocab=vocab,
                   text_field_embedder=text_field_embedder,
                   attend_feedforward=attend_feedforward,
                   matrix_attention=matrix_attention,
                   compare_feedforward=compare_feedforward,
                   classifier_feedforward=classifier_feedforward,
                   final_classifier_feedforward=final_classifier_feedforward,
                   utterance_encoder=utterance_encoder,
                   initializer=initializer,
                   regularizer=regularizer)