Esempio n. 1
0
    def __init__(
        self,
        input_dim: int,
        hidden_dim: int,
        attention_projection_dim: int,
        feedforward_hidden_dim: int,
        num_convs: int,
        conv_kernel_size: int,
        num_attention_heads: int,
        use_positional_encoding: bool = True,
        dropout_prob: float = 0.1,
        layer_dropout_undecayed_prob: float = 0.1,
        attention_dropout_prob: float = 0,
    ) -> None:
        super().__init__()

        check_dimensions_match(input_dim, hidden_dim, "input_dim", "hidden_dim")

        self._use_positional_encoding = use_positional_encoding

        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)]
        )
        self._conv_layers = torch.nn.ModuleList()
        for _ in range(num_convs):
            padding = torch.nn.ConstantPad1d(
                (conv_kernel_size // 2, (conv_kernel_size - 1) // 2), 0
            )
            depthwise_conv = torch.nn.Conv1d(
                hidden_dim, hidden_dim, conv_kernel_size, groups=hidden_dim
            )
            pointwise_conv = torch.nn.Conv1d(hidden_dim, hidden_dim, 1)
            self._conv_layers.append(
                torch.nn.Sequential(
                    padding, depthwise_conv, pointwise_conv, Activation.by_name("relu")()
                )
            )

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.attention_layer = MultiHeadSelfAttention(
            num_heads=num_attention_heads,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob,
        )
        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[Activation.by_name("relu")(), Activation.by_name("linear")()],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob,
        )

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim
Esempio n. 2
0
    def __init__(
            self,
            input_dim: int,
            hidden_dim: int,
            attention_projection_dim: int,
            feedforward_hidden_dim: int,
            num_convs: int,
            conv_kernel_size: int,
            num_attention_heads: int,
            num_semantic_labels: int,
            replace_zero_semantic_labels_with_per_head_labels: bool = True,
            use_positional_encoding: bool = True,
            dropout_prob: float = 0.1,
            layer_dropout_undecayed_prob: float = 0.1,
            attention_dropout_prob: float = 0,
            semantic_integration_mode: str = "projection",
            semantic_emb_dim: int = 0,
            use_semantic_views: bool = True,
            multi_head_attention_batch_computation: bool = False,
            use_separate_label_embeddings_for_q_and_k: bool = True) -> None:
        super().__init__()

        self.return_output_meta_is_supported = True

        check_dimensions_match(input_dim, hidden_dim, 'input_dim',
                               'hidden_dim')

        self._use_positional_encoding = use_positional_encoding
        self._replace_zero_semantic_labels_with_per_head_labels = replace_zero_semantic_labels_with_per_head_labels
        self._conv_norm_layers = torch.nn.ModuleList(
            [LayerNorm(hidden_dim) for _ in range(num_convs)])
        self._conv_layers = torch.nn.ModuleList()

        if semantic_integration_mode not in semantic_integration_mode_supported:
            raise Exception(
                "semantic_integration_mode must be in [{0}] but is `{1}`".
                format(", ".join(semantic_integration_mode_supported),
                       semantic_integration_mode))
        self._semantic_integration_mode = semantic_integration_mode

        self._use_separate_label_embeddings_for_q_and_k = use_separate_label_embeddings_for_q_and_k
        for _ in range(num_convs):
            padding = torch.nn.ConstantPad1d(
                (conv_kernel_size // 2, (conv_kernel_size - 1) // 2), 0)
            depthwise_conv = torch.nn.Conv1d(hidden_dim,
                                             hidden_dim,
                                             conv_kernel_size,
                                             groups=hidden_dim)
            pointwise_conv = torch.nn.Conv1d(hidden_dim, hidden_dim, 1)
            self._conv_layers.append(
                torch.nn.Sequential(padding, depthwise_conv, pointwise_conv,
                                    Activation.by_name("relu")()))

        self.attention_norm_layer = LayerNorm(hidden_dim)
        self.num_semantic_labels = num_semantic_labels
        self.num_attention_heads = num_attention_heads
        self.attention_layer = MultiHeadSemanticFlatConcatSelfAttention(
            num_heads=num_attention_heads,
            num_semantic_labels=num_semantic_labels,
            input_dim=hidden_dim,
            attention_dim=attention_projection_dim,
            values_dim=attention_projection_dim,
            attention_dropout_prob=attention_dropout_prob,
            semantic_integration_mode=semantic_integration_mode,
            semantic_emb_dim=semantic_emb_dim,
            use_semantic_views=use_semantic_views,
            multi_head_attention_batch_computation=
            multi_head_attention_batch_computation,
            use_separate_label_embeddings_for_q_and_k=
            use_separate_label_embeddings_for_q_and_k)

        self.feedforward_norm_layer = LayerNorm(hidden_dim)
        self.feedforward = FeedForward(
            hidden_dim,
            activations=[
                Activation.by_name('relu')(),
                Activation.by_name('linear')()
            ],
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            num_layers=2,
            dropout=dropout_prob)

        self.dropout = Dropout(dropout_prob)
        self.residual_with_layer_dropout = ResidualWithLayerDropout(
            layer_dropout_undecayed_prob)
        self._input_dim = input_dim
        self._output_dim = hidden_dim