Ejemplo n.º 1
0
    def __init__(
        self,
        input_dim: int,  # input embedding dimension
        num_layers: int = 6,
        num_heads: int = 8,
        feedforward_hidden_dim: int = None,
        feedforward_dropout: float = 0.1,
        attention_dim: int = None,
        value_dim: int = None,
        residual_dropout: float = 0.1,
        attention_dropout: float = 0.1,
        use_positional_embedding: bool = True,
    ):
        super(TransformerEncoder, self).__init__()

        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._attention_norm_layers: List[LayerNorm] = []
        self._feedforward_layers: List[FeedForward] = []
        self._feedforward_norm_layers: List[LayerNorm] = []

        hidden_dim = input_dim
        attention_dim = attention_dim or (hidden_dim // num_heads)
        value_dim = value_dim or (hidden_dim // num_heads)
        feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim

        for i in range(num_layers):
            attention = MultiHeadSelfAttention(
                num_heads,
                hidden_dim,
                attention_dim * num_heads,
                value_dim * num_heads,
                attention_dropout=attention_dropout)
            self.add_module(f'attention_{i}', attention)
            self._attention_layers.append(attention)

            attention_norm = LayerNorm(hidden_dim)
            self.add_module(f'attention_norm_{i}', attention_norm)
            self._attention_norm_layers.append(attention_norm)

            feedfoward = FeedForward(
                hidden_dim,
                num_layers=2,
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                dropout=feedforward_dropout)
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedforward_layers.append(feedfoward)

            feedforward_norm = LayerNorm(hidden_dim)
            self.add_module(f"feedforward_norm_{i}", feedforward_norm)
            self._feedforward_norm_layers.append(feedforward_norm)

        self._dropout = torch.nn.Dropout(residual_dropout)
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self._use_positional_embedding = use_positional_embedding
Ejemplo n.º 2
0
    def __init__(
        self,
        input_dim: int,
        num_heads: int = 8,
        attention_dim: Optional[int] = None,
        value_dim: Optional[int] = None,
        feedforward_hidden_dim: int = None,
        residual_dropout: float = 0.1,
        attention_dropout: float = 0.1,
        feedforward_dropout: float = 0.1,
        use_vanilla_wiring: bool = False,
    ):
        super(UTDecBlock, self).__init__()
        hidden_dim = input_dim
        attention_dim = attention_dim or (hidden_dim // num_heads)
        value_dim = value_dim or (hidden_dim // num_heads)
        feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim

        self._masked_attention = MaskedMultiHeadSelfAttention(
            num_heads,
            hidden_dim,
            attention_dim * num_heads,
            value_dim * num_heads,
            attention_dropout=attention_dropout)
        self._masked_attention_norm = LayerNorm(hidden_dim)

        self._attention = MultiHeadAttention(
            num_heads,
            hidden_dim,
            hidden_dim,
            attention_dim * num_heads,
            value_dim * num_heads,
            attention_dropout=attention_dropout)
        self._dropout = torch.nn.Dropout(residual_dropout)
        self._attention_norm = LayerNorm(hidden_dim)

        # use feedforward net as transition function
        self._feedforward = FeedForward(
            hidden_dim,
            num_layers=2,
            hidden_dims=[feedforward_hidden_dim, hidden_dim],
            activations=[
                Activation.by_name('relu')(),
                Activation.by_name('linear')()
            ],
            dropout=feedforward_dropout)
        self._feedforward_norm = LayerNorm(hidden_dim)

        self._use_vanilla_wiring = use_vanilla_wiring
Ejemplo n.º 3
0
    def __init__(self,
                 word_embeddings: TextFieldEmbedder,
                 sentence_encoder: Seq2SeqEncoder,
                 document_encoder: Seq2SeqEncoder,
                 relation_encoder: Seq2SeqEncoder,
                 document_relation_encoder: Seq2SeqEncoder,
                 vocab: Vocabulary,
                 encoder_dropout: float = 0.5,
                 ffn_dropout: float = 0.2) -> None:
        # We have to pass the vocabulary to the constructor.
        super().__init__(vocab)

        self.word_embeddings = word_embeddings

        self.encoder_dropout = torch.nn.Dropout(p=encoder_dropout)

        self.sentence_encoder = sentence_encoder

        self.sentence_attn = LinearAttention(
            input_dim=self.sentence_encoder.get_output_dim())

        self.document_encoder = document_encoder
        self.document_attn = LinearAttention(
            input_dim=self.document_encoder.get_output_dim())

        self.relation_encoder = relation_encoder
        self.relation_attn = LinearAttention(
            input_dim=self.relation_encoder.get_output_dim())

        linear_dim = document_encoder.get_output_dim()
        feedforward_dim = 4 * linear_dim

        self.ffn = torch.nn.Sequential(
            torch.nn.Linear(linear_dim, feedforward_dim),
            torch.nn.ReLU(inplace=True), torch.nn.Dropout(ffn_dropout),
            torch.nn.Linear(feedforward_dim, linear_dim),
            torch.nn.Dropout(ffn_dropout))
        self.norm = LayerNorm(linear_dim)

        self.output = torch.nn.Linear(in_features=linear_dim, out_features=1)
Ejemplo n.º 4
0
    def __init__(
        self,
        hidden_size: int,
        num_layers: int = 7,
    ):
        super(SLSTMEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        # h_t updates
        self.h_context_linearity = torch.nn.Linear(2 * hidden_size,
                                                   7 * hidden_size,
                                                   bias=False)
        self.h_current_linearity = torch.nn.Linear(hidden_size,
                                                   7 * hidden_size,
                                                   bias=False)
        self.h_input_linearity = torch.nn.Linear(hidden_size,
                                                 7 * hidden_size,
                                                 bias=True)
        self.h_global_linearity = torch.nn.Linear(hidden_size,
                                                  7 * hidden_size,
                                                  bias=False)

        # global updates
        self.g_input_linearity = torch.nn.Linear(hidden_size,
                                                 3 * hidden_size,
                                                 bias=True)
        self.g_hidden_linearity = torch.nn.Linear(hidden_size,
                                                  hidden_size,
                                                  bias=False)
        self.g_avg_linearity = torch.nn.Linear(hidden_size,
                                               2 * hidden_size,
                                               bias=False)

        # layer normalization layer
        self.layer_norms = torch.nn.ModuleList(
            [LayerNorm(hidden_size) for _ in range(10)])

        self.reset_parameters()
Ejemplo n.º 5
0
    def __init__(
        self,
        input_dim: int,  # input embedding dimension
        num_layers: int = 6,
        num_heads: int = 8,
        feedforward_hidden_dim: int = None,
        feedforward_dropout: float = 0.1,
        attention_dim: int = None,
        value_dim: int = None,
        residual_dropout: float = 0.1,
        attention_dropout: float = 0.1,
        use_positional_embedding: bool = True,
    ):
        """
        Construct a decoder for transformer, which is in charge of modules in the transformer model
        from the Positional Embedding before the final linear projection.
        The embedding and linear projection should be implemented elsewhere.

        :param num_layers: the number of stack layers of the transformer block
        """
        super(TransformerDecoder, self).__init__()

        self._mask_attention_layers: List[MaskedMultiHeadSelfAttention] = []
        self._mask_attention_norm_layers: List[LayerNorm] = []
        self._attention_layers: List[MultiHeadAttention] = []
        self._attention_norm_layers: List[LayerNorm] = []
        self._feedforward_layers: List[FeedForward] = []
        self._feedforward_norm_layers: List[LayerNorm] = []

        hidden_dim = input_dim  # the hidden states dimension outputted by the decoder module

        attention_dim = attention_dim or (hidden_dim // num_heads)
        value_dim = value_dim or (hidden_dim // num_heads)
        feedforward_hidden_dim = feedforward_hidden_dim or hidden_dim

        for i in range(num_layers):
            masked_attention = MaskedMultiHeadSelfAttention(
                num_heads,
                hidden_dim,
                attention_dim * num_heads,
                value_dim * num_heads,
                attention_dropout=attention_dropout)
            self.add_module(f'masked_attention_{i}', masked_attention)
            self._mask_attention_layers.append(masked_attention)

            masked_attention_norm = LayerNorm(hidden_dim)
            self.add_module(f'masked_attention_norm_{i}',
                            masked_attention_norm)
            self._mask_attention_norm_layers.append(masked_attention_norm)

            attention = MultiHeadAttention(num_heads,
                                           hidden_dim,
                                           hidden_dim,
                                           attention_dim * num_heads,
                                           value_dim * num_heads,
                                           attention_dropout=attention_dropout)
            self.add_module(f'attention_{i}', attention)
            self._attention_layers.append(attention)

            attention_norm = LayerNorm(hidden_dim)
            self.add_module(f'attention_norm_{i}', attention_norm)
            self._attention_norm_layers.append(attention_norm)

            feedfoward = FeedForward(
                hidden_dim,
                num_layers=2,
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                dropout=feedforward_dropout)
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedforward_layers.append(feedfoward)

            feedforward_norm = LayerNorm(hidden_dim)
            self.add_module(f"feedforward_norm_{i}", feedforward_norm)
            self._feedforward_norm_layers.append(feedforward_norm)

        self._dropout = torch.nn.Dropout(residual_dropout)
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self._use_positional_embedding = use_positional_embedding
Ejemplo n.º 6
0
    def __init__(self,
                 input_dim: int,
                 hidden_dim: int,
                 projection_dim: int,
                 feedforward_hidden_dim: int,
                 num_layers: int,
                 num_attention_heads: int,
                 dropout_prob: float = 0.1,
                 residual_dropout_prob: float = 0.2,
                 attention_dropout_prob: float = 0.1) -> None:
        super().__init__()

        self._attention_layers: List[MultiHeadSelfAttention] = []
        self._feedfoward_layers: List[FeedForward] = []
        self._layer_norm_layers: List[LayerNorm] = []
        self._feed_forward_layer_norm_layers: List[LayerNorm] = []
        self._reset_gate_layers: List[FeedForward] = []

        feedfoward_input_dim = input_dim
        for i in range(num_layers):
            feedfoward = FeedForward(
                feedfoward_input_dim,
                activations=[
                    Activation.by_name('relu')(),
                    Activation.by_name('linear')()
                ],
                hidden_dims=[feedforward_hidden_dim, hidden_dim],
                num_layers=2,
                dropout=dropout_prob)

            # Note: Please use `ModuleList` in new code. It provides better
            # support for running on multiple GPUs. We've kept `add_module` here
            # solely for backwards compatibility with existing serialized models.
            self.add_module(f"feedforward_{i}", feedfoward)
            self._feedfoward_layers.append(feedfoward)

            feedforward_layer_norm = LayerNorm(feedfoward.get_output_dim())
            self.add_module(f"feedforward_layer_norm_{i}",
                            feedforward_layer_norm)
            self._feed_forward_layer_norm_layers.append(feedforward_layer_norm)

            self_attention = MultiHeadSelfAttention(
                num_heads=num_attention_heads,
                input_dim=hidden_dim,
                attention_dim=projection_dim,
                values_dim=projection_dim,
                attention_dropout_prob=attention_dropout_prob)
            self.add_module(f"self_attention_{i}", self_attention)
            self._attention_layers.append(self_attention)

            reset_gate = FeedForward(
                feedforward_hidden_dim,
                activations=Activation.by_name('sigmoid')(),
                hidden_dims=hidden_dim,
                num_layers=1,
                dropout=dropout_prob)
            self.add_module(f"reset_gate_{i}", reset_gate)
            self._reset_gate_layers.append(reset_gate)

            layer_norm = LayerNorm(self_attention.get_output_dim())
            self.add_module(f"layer_norm_{i}", layer_norm)
            self._layer_norm_layers.append(layer_norm)

            feedfoward_input_dim = hidden_dim

        self.dropout = Dropout(residual_dropout_prob)
        self._input_dim = input_dim
        self._output_dim = self._attention_layers[-1].get_output_dim()
Ejemplo n.º 7
0
 def __init__(self, size, dropout):
     super(SublayerConnection, self).__init__()
     self.norm = LayerNorm(size)
     self.dropout = nn.Dropout(dropout)
Ejemplo n.º 8
0
    def __init__(
            self,
            vocab: Vocabulary,
            text_field_embedder: TextFieldEmbedder,
            title_encoder: Seq2VecEncoder,
            abstract_encoder: Seq2VecEncoder,
            venue_encoder: Seq2VecEncoder,
            body_encoder: Seq2VecEncoder = None,
            predict_mode: bool = False,
            author_text_embedder: TextFieldEmbedder = None,
            venue_field_embedder: TextFieldEmbedder = None,
            author_text_encoder: Seq2VecEncoder = None,
            # author_id_embedder: Optional[Embedding] = None,
            author_id_embedder: TextFieldEmbedder = None,
            # author_position_embedder: Optional[Embedding] = None,
            author_position_embedder: TextFieldEmbedder = None,
            feedforward: FeedForward = None,
            author_feedforward: FeedForward = None,
            initializer: InitializerApplicator = InitializerApplicator(),
            regularizer: Optional[RegularizerApplicator] = None,
            max_num_authors: Optional[int] = 5,
            dropout: Optional[float] = None,
            ignore_authors: Optional[bool] = False,
            layer_norm: Optional[bool] = True,
            embedding_layer_norm: Optional[bool] = False,
            loss_distance: Optional[str] = 'l2-norm',
            loss_margin: Optional[float] = 1,
            bert_finetune: Optional[bool] = False,
            include_venue: Optional[bool] = False) -> None:
        super(Specter, self).__init__(vocab, regularizer)

        for lbl in range(max_num_authors):
            vocab.add_token_to_namespace(token=str(lbl),
                                         namespace='author_positions')

        self.text_field_embedder = text_field_embedder
        self.venue_field_embedder = venue_field_embedder
        self.title_encoder = title_encoder
        self.abstract_encoder = abstract_encoder
        self.body_encoder = body_encoder
        self.venue_encoder = venue_encoder

        self.predict_mode = predict_mode

        self.feedforward = feedforward

        if loss_distance == 'l2-norm':
            self.loss = torch.nn.TripletMarginLoss(margin=loss_margin,
                                                   reduction='none')
        elif loss_distance == 'binary':
            self.loss = BinaryLoss(margin=loss_margin)
        else:
            self.loss = TripletLoss(margin=loss_margin,
                                    distance=loss_distance,
                                    reduction='none')

        if layer_norm:
            self.layer_norm = LayerNorm(self.feedforward.get_output_dim())
        self.do_layer_norm = layer_norm

        # self.layer_norm_author_embedding = LayerNorm(author_feedforward.get_output_dim())

        if embedding_layer_norm:
            self.layer_norm_word_embedding = LayerNorm(
                self.title_encoder.get_input_dim())
            self.layer_norm_word_embedding_venue = LayerNorm(
                self.venue_encoder.get_input_dim())
        self.embedding_layer_norm = embedding_layer_norm

        self.dropout = Dropout()

        self.ignore_authors = ignore_authors

        if not ignore_authors:
            self.author_id_embedder = author_id_embedder
            self.author_position_embedder = author_position_embedder
            self.author_text_embedder = author_text_embedder
            self.author_text_encoder = author_text_encoder
            # author representation would be a concatenation of author-id and author-position
            # [batch, num-authors, auth-dim + position-dim]
            # we apply timedistributed mlp on top to make this a:
            # [batch, num-authors, dim]
            self.author_time_dist_ff = TimeDistributed(author_feedforward)

        # internal variable showing that the title/abstract should be encoded with a transformer
        # do not change this as it should be by default `false` in this class
        # in the inheriting `PaperRepresentationTransoformer` class it is set to true in the constructor
        # to indicate that the title/abstract should be encoded with a transformer.
        self.tansformer_encoder = False

        self.bert_finetune = bert_finetune
        self.include_venue = include_venue

        self.include_venue = include_venue

        initializer(self)