Beispiel #1
0
    def __init__(self, words_field, tags_field, options):
        super().__init__(words_field, tags_field)

        #
        # Embeddings
        #
        word_embeddings = None
        if self.words_field.vocab.vectors is not None:
            word_embeddings = self.words_field.vocab.vectors
            options.word_embeddings_size = word_embeddings.size(1)

        self.word_emb = nn.Embedding(
            num_embeddings=len(self.words_field.vocab),
            embedding_dim=options.word_embeddings_size,
            padding_idx=constants.PAD_ID,
            _weight=word_embeddings,
        )
        self.dropout_emb = nn.Dropout(options.emb_dropout)

        if options.freeze_embeddings:
            self.word_emb.weight.requires_grad = False

        features_size = options.word_embeddings_size

        #
        # CNN 1D
        #
        self.cnn_1d = nn.Conv1d(in_channels=features_size,
                                out_channels=options.conv_size,
                                kernel_size=options.kernel_size,
                                padding=options.kernel_size // 2)
        self.max_pool = nn.MaxPool1d(options.pool_length,
                                     padding=options.pool_length // 2)
        self.dropout_cnn = nn.Dropout(options.cnn_dropout)
        self.relu = torch.nn.ReLU()

        features_size = (options.conv_size // options.pool_length +
                         options.pool_length // 2)

        #
        # Attention
        #

        # they are equal for self-attention
        query_size = key_size = value_size = features_size

        if options.attn_scorer == 'dot_product':
            self.attn_scorer = DotProductScorer(scaled=True)
        elif options.attn_scorer == 'general':
            self.attn_scorer = GeneralScorer(query_size, key_size)
        elif options.attn_scorer == 'add':
            self.attn_scorer = OperationScorer(query_size,
                                               key_size,
                                               options.attn_hidden_size,
                                               op='add')
        elif options.attn_scorer == 'concat':
            self.attn_scorer = OperationScorer(query_size,
                                               key_size,
                                               options.attn_hidden_size,
                                               op='concat')
        elif options.attn_scorer == 'mlp':
            self.attn_scorer = MLPScorer(query_size, key_size)
        else:
            raise Exception('Attention scorer `{}` not available'.format(
                options.attn_scorer))

        if options.attn_type == 'regular':
            self.attn = Attention(self.attn_scorer,
                                  dropout=options.attn_dropout)
        elif options.attn_type == 'multihead':
            self.attn = MultiHeadedAttention(
                self.attn_scorer,
                options.attn_nb_heads,
                query_size,
                key_size,
                value_size,
                options.attn_multihead_hidden_size,
                dropout=options.attn_dropout)
            features_size = options.attn_multihead_hidden_size
        else:
            raise Exception('Attention `{}` not available'.format(
                options.attn_type))

        #
        # Linear
        #
        self.linear_out = nn.Linear(features_size, self.nb_classes)

        self.init_weights()
        self.is_built = True
Beispiel #2
0
    def __init__(self, words_field, tags_field, options):
        super().__init__(words_field, tags_field)

        word_embeddings = None
        if self.words_field.vocab.vectors is not None:
            word_embeddings = self.words_field.vocab.vectors
            options.word_embeddings_size = word_embeddings.size(1)

        self.word_emb = nn.Embedding(
            num_embeddings=len(self.words_field.vocab),
            embedding_dim=options.word_embeddings_size,
            padding_idx=constants.PAD_ID,
            _weight=word_embeddings)

        features_size = options.word_embeddings_size

        if options.freeze_embeddings:
            self.word_emb.weight.requires_grad = False

        self.is_bidir = options.bidirectional
        self.sum_bidir = options.sum_bidir
        self.rnn_type = options.rnn_type

        rnn_class = nn.RNN
        batch_first = True
        if self.rnn_type == 'gru':
            rnn_class = nn.GRU
        elif self.rnn_type == 'lstm':
            rnn_class = nn.LSTM
        elif self.rnn_type == 'qrnn':
            from torchqrnn import QRNN
            rnn_class = QRNN
            batch_first = False

        hidden_size = options.hidden_size[0]
        self.hidden = None
        self.rnn = rnn_class(features_size,
                             hidden_size,
                             bidirectional=self.is_bidir,
                             batch_first=batch_first)
        features_size = hidden_size

        #
        # Attention
        #

        # they are equal for self-attention
        n = 1 if not self.is_bidir or self.sum_bidir else 2
        query_size = key_size = value_size = n * features_size

        if options.attn_scorer == 'dot_product':
            self.attn_scorer = DotProductScorer(scaled=True)
        elif options.attn_scorer == 'general':
            self.attn_scorer = GeneralScorer(query_size, key_size)
        elif options.attn_scorer == 'add':
            self.attn_scorer = OperationScorer(query_size,
                                               key_size,
                                               options.attn_hidden_size,
                                               op='add')
        elif options.attn_scorer == 'concat':
            self.attn_scorer = OperationScorer(query_size,
                                               key_size,
                                               options.attn_hidden_size,
                                               op='concat')
        elif options.attn_scorer == 'mlp':
            self.attn_scorer = MLPScorer(query_size, key_size)
        else:
            raise Exception('Attention scorer `{}` not available'.format(
                options.attn_scorer))

        if options.attn_type == 'regular':
            self.attn = Attention(self.attn_scorer,
                                  dropout=options.attn_dropout)
        elif options.attn_type == 'multihead':
            self.attn = MultiHeadedAttention(
                self.attn_scorer,
                options.attn_nb_heads,
                query_size,
                key_size,
                value_size,
                options.attn_multihead_hidden_size,
                dropout=options.attn_dropout)
            features_size = options.attn_multihead_hidden_size
        else:
            raise Exception('Attention `{}` not available'.format(
                options.attn_type))

        self.crf = CRF(
            self.nb_classes,
            bos_tag_id=self.tags_field.vocab.stoi['_'],  # hack
            eos_tag_id=self.tags_field.vocab.stoi['.'],  # hack
            pad_tag_id=None,
            batch_first=True,
        )

        #
        # Linear
        #
        self.linear_out = nn.Linear(features_size, self.nb_classes)

        self.selu = torch.nn.SELU()
        self.dropout_emb = nn.Dropout(options.emb_dropout)
        self.dropout_rnn = nn.Dropout(options.rnn_dropout)

        self.init_weights()
        self.is_built = True
Beispiel #3
0
    def __init__(self, words_field, tags_field, options):
        super().__init__(words_field, tags_field)

        #
        # Embeddings
        #
        word_embeddings = None
        if self.words_field.vocab.vectors is not None:
            word_embeddings = self.words_field.vocab.vectors
            options.word_embeddings_size = word_embeddings.size(1)

        self.word_emb = nn.Embedding(
            num_embeddings=len(self.words_field.vocab),
            embedding_dim=options.word_embeddings_size,
            padding_idx=constants.PAD_ID,
            _weight=word_embeddings,
        )
        self.dropout_emb = nn.Dropout(options.emb_dropout)

        if options.freeze_embeddings:
            self.word_emb.weight.requires_grad = False

        features_size = options.word_embeddings_size

        #
        # Attention
        #

        # they are equal for self-attention
        query_size = key_size = value_size = features_size

        if options.attn_scorer == 'dot_product':
            self.attn_scorer = DotProductScorer(scaled=True)
        elif options.attn_scorer == 'general':
            self.attn_scorer = GeneralScorer(query_size, key_size)
        elif options.attn_scorer == 'add':
            self.attn_scorer = OperationScorer(query_size,
                                               key_size,
                                               options.attn_hidden_size,
                                               op='add')
        elif options.attn_scorer == 'concat':
            self.attn_scorer = OperationScorer(query_size,
                                               key_size,
                                               options.attn_hidden_size,
                                               op='concat')
        elif options.attn_scorer == 'mlp':
            self.attn_scorer = MLPScorer(query_size, key_size)
        else:
            raise Exception('Attention scorer `{}` not available'.format(
                options.attn_scorer))

        if options.attn_type == 'regular':
            self.attn = Attention(self.attn_scorer,
                                  dropout=options.attn_dropout)
        elif options.attn_type == 'multihead':
            self.attn = MultiHeadedAttention(
                self.attn_scorer,
                options.attn_nb_heads,
                query_size,
                key_size,
                value_size,
                options.attn_multihead_hidden_size,
                dropout=options.attn_dropout)
            features_size = options.attn_multihead_hidden_size
        else:
            raise Exception('Attention `{}` not available'.format(
                options.attn_type))

        #
        # Linear
        #
        self.linear_out = nn.Linear(features_size, self.nb_classes)

        self.crf = CRF(
            self.nb_classes,
            bos_tag_id=self.tags_field.vocab.stoi['_'],  # hack
            eos_tag_id=self.tags_field.vocab.stoi['.'],  # hack
            pad_tag_id=None,
            batch_first=True,
        )

        self.init_weights()
        self.is_built = True
Beispiel #4
0
        nb_heads,
        query_size,
        query_size,
        query_size,
        hidden_size,
        dropout=0.1,
    )
    out, probs = attn(qs, kq, kq, mask=source_mask)
    assert list(out.shape) == [batch_size, qs.shape[1], hidden_size]
    assert list(probs.shape) == [batch_size, nb_heads, qs.shape[1], kq.shape[1]]

    '''
    Multi headed with different attentions
    '''
    # multi headed general attention on source (encoder)
    scorer = GeneralScorer(hidden_size // nb_heads, hidden_size // nb_heads)
    attn = MultiHeadedAttention(
        scorer,
        nb_heads,
        query_size,
        key_size,
        value_size,
        hidden_size,
        dropout=0.1,
    )
    out, probs = attn(kq, ks, vs)
    assert list(out.shape) == [batch_size, kq.shape[1], hidden_size]
    assert list(probs.shape) == [batch_size, nb_heads, kq.shape[1], kq.shape[1]]

    # multi headed add attention on source (encoder)
    scorer = OperationScorer(
Beispiel #5
0
    # masked self attention on target (decoder)
    mask = sequence_mask(torch.LongTensor([2, 1, 2, 3]))
    attn = Attention(DotProductScorer(), dropout=0.1)
    out, probs = attn(qs, qs, qs, mask=mask)
    assert list(out.shape) == list(qs.shape)
    assert list(probs.shape) == [batch_size, qs.shape[1], qs.shape[1]]

    # masked self attention on source (encoder)
    mask = sequence_mask(torch.LongTensor([5, 3, 7, 4]))
    attn = Attention(DotProductScorer(), dropout=0.1)
    out, probs = attn(ks, ks, ks, mask=mask)
    assert list(out.shape) == list(ks.shape)
    assert list(probs.shape) == [batch_size, ks.shape[1], ks.shape[1]]

    # decoder attend to encoder - multiplicative attention
    attn = Attention(GeneralScorer(query_size, keys_size), dropout=0.1)
    out, probs = attn(qs, ks, ks)
    assert list(out.shape) == [batch_size, qs.shape[1], ks.shape[-1]]
    assert list(probs.shape) == [batch_size, qs.shape[1], ks.shape[1]]

    # masked encoder attend to decoder - multiplicative attention
    # this is odd but we can do it anyway :-)
    mask = sequence_mask(torch.LongTensor([2, 1, 2, 3]))
    attn = Attention(GeneralScorer(keys_size, query_size), dropout=0.1)
    out, probs = attn(ks, qs, qs, mask=mask)
    assert list(out.shape) == [batch_size, ks.shape[1], qs.shape[-1]]
    assert list(probs.shape) == [batch_size, ks.shape[1], qs.shape[1]]

    # masked decoder attend to encoder - multiplicative attention
    mask = sequence_mask(torch.LongTensor([5, 3, 7, 4]))
    attn = Attention(GeneralScorer(query_size, keys_size), dropout=0.1)