Esempio n. 1
0
    def __init__(self,
                 field: str,
                 embed: Union[int, Embedding], num_filters: int,
                 ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5),
                 conv_layer_activation: str = 'ReLU',
                 output_dim: Optional[int] = None,
                 vocab_size=None) -> None:
        """A `CnnEncoder` is a combination of multiple convolution layers and max pooling layers.
        The input to this module is of shape `(batch_size, num_tokens,
        input_dim)`, and the output is of shape `(batch_size, output_dim)`.

        The CNN has one convolution layer for each ngram filter size. Each convolution operation gives
        out a vector of size num_filters. The number of times a convolution layer will be used
        is `num_tokens - ngram_size + 1`. The corresponding maxpooling layer aggregates all these
        outputs from the convolution layer and outputs the max.

        This operation is repeated for every ngram size passed, and consequently the dimensionality of
        the output after maxpooling is `len(ngram_filter_sizes) * num_filters`.  This then gets
        (optionally) projected down to a lower dimensional output, specified by `output_dim`.

        We then use a fully connected layer to project in back to the desired output_dim.  For more
        details, refer to "A Sensitivity Analysis of (and Practitioners’ Guide to) Convolutional Neural
        Networks for Sentence Classification", Zhang and Wallace 2016, particularly Figure 1.

        See allennlp.modules.seq2vec_encoders.cnn_encoder.CnnEncoder, Apache 2.0

        Args:
            field: The field in samples this encoder will work on.
            embed: An ``Embedding`` object or the feature size to create an ``Embedding`` object.
            num_filters: This is the output dim for each convolutional layer, which is the number of "filters"
                learned by that layer.
            ngram_filter_sizes: This specifies both the number of convolutional layers we will create and their sizes.  The
                default of `(2, 3, 4, 5)` will have four convolutional layers, corresponding to encoding
                ngrams of size 2 to 5 with some number of filters.
            conv_layer_activation: `Activation`, optional (default=`torch.nn.ReLU`)
                Activation to use after the convolution layers.
            output_dim: After doing convolutions and pooling, we'll project the collected features into a vector of
                this size.  If this value is `None`, we will just return the result of the max pooling,
                giving an output of shape `len(ngram_filter_sizes) * num_filters`.
            vocab_size: The size of character vocab.

        Returns:
            A tensor of shape `(batch_size, output_dim)`.
        """
        super().__init__()
        EmbeddingDim.__init__(self)
        # the embedding layer
        if isinstance(embed, int):
            embed = nn.Embedding(num_embeddings=vocab_size,
                                 embedding_dim=embed)
        else:
            raise ValueError(f'Unrecognized type for {embed}')
        self.field = field
        self.embed = TimeDistributed(embed)
        self.encoder = TimeDistributed(
            CnnEncoder(embed.embedding_dim, num_filters, ngram_filter_sizes, conv_layer_activation, output_dim))
        self.embedding_dim = output_dim or num_filters * len(ngram_filter_sizes)
Esempio n. 2
0
 def __init__(self,
              field: str,
              embed: Union[int, Embedding],
              num_filters: int,
              ngram_filter_sizes: Tuple[int, ...] = (2, 3, 4, 5),
              conv_layer_activation: str = 'ReLU',
              output_dim: Optional[int] = None,
              vocab_size=None) -> None:
     super().__init__()
     EmbeddingDim.__init__(self)
     # the embedding layer
     if isinstance(embed, int):
         embed = nn.Embedding(num_embeddings=vocab_size,
                              embedding_dim=embed)
     else:
         raise ValueError(f'Unrecognized type for {embed}')
     self.field = field
     self.embed = TimeDistributed(embed)
     self.encoder = TimeDistributed(
         CnnEncoder(embed.embedding_dim, num_filters, ngram_filter_sizes,
                    conv_layer_activation, output_dim))
     self.embedding_dim = output_dim or num_filters * len(
         ngram_filter_sizes)
Esempio n. 3
0
 def __init__(self, context_layer_output_dim, label_space_size,
              config) -> None:
     super().__init__()
     self.config = config
     self.label_space_size = label_space_size
     self.dropout = float(config.dropout)
     self.use_gold_predicates = config.use_gold_predicates
     # span width feature embedding
     self.span_width_embedding = nn.Embedding(
         self.config.max_arg_width, self.config.span_width_feature_size)
     # self.context_projective_layer = nn.Linear(2 * self.lstm_hidden_size, self.config.num_attention_heads)
     # span scores
     self.span_emb_size = 3 * context_layer_output_dim + self.config.span_width_feature_size
     self.arg_unary_score_layers = nn.ModuleList([
         nn.Linear(self.span_emb_size, self.config.ffnn_size) if i == 0 else
         nn.Linear(self.config.ffnn_size, self.config.ffnn_size)
         for i in range(self.config.ffnn_depth)
     ])  # [,150]
     self.arg_dropout_layers = nn.ModuleList(
         [nn.Dropout(self.dropout) for _ in range(self.config.ffnn_depth)])
     self.arg_unary_score_projection = nn.Linear(self.config.ffnn_size, 1)
     # predicate scores
     self.pred_unary_score_layers = nn.ModuleList([
         nn.Linear(context_layer_output_dim, self.config.ffnn_size) if i
         == 0 else nn.Linear(self.config.ffnn_size, self.config.ffnn_size)
         for i in range(self.config.ffnn_depth)
     ])  # [,150]
     self.pred_dropout_layers = nn.ModuleList(
         [nn.Dropout(self.dropout) for _ in range(self.config.ffnn_depth)])
     self.pred_unary_score_projection = nn.Linear(self.config.ffnn_size, 1)
     # srl scores
     self.srl_unary_score_input_size = self.span_emb_size + context_layer_output_dim
     self.srl_unary_score_layers = nn.ModuleList([
         nn.Linear(self.srl_unary_score_input_size,
                   self.config.ffnn_size) if i == 0 else nn.Linear(
                       self.config.ffnn_size, self.config.ffnn_size)
         for i in range(self.config.ffnn_depth)
     ])
     self.srl_dropout_layers = nn.ModuleList(
         [nn.Dropout(self.dropout) for _ in range(self.config.ffnn_depth)])
     self.srl_unary_score_projection = nn.Linear(self.config.ffnn_size,
                                                 self.label_space_size - 1)
     if config.use_biaffine:
         self.predicate_scale = TimeDistributed(
             FeedForward(context_layer_output_dim, 1, self.span_emb_size,
                         'ReLU'))
         self.biaffine = Biaffine(self.span_emb_size,
                                  self.label_space_size - 1)
     self.loss_reduction = config.loss_reduction
     self.reset_parameters()
Esempio n. 4
0
 def new_mlp():
     return TimeDistributed(nn.Linear(hidden_size, ffnn_size))