def __init__(self, words_field, tags_field, options): super().__init__(words_field, tags_field) # # Embeddings # word_embeddings = None if self.words_field.vocab.vectors is not None: word_embeddings = self.words_field.vocab.vectors options.word_embeddings_size = word_embeddings.size(1) self.word_emb = nn.Embedding( num_embeddings=len(self.words_field.vocab), embedding_dim=options.word_embeddings_size, padding_idx=constants.PAD_ID, _weight=word_embeddings, ) self.dropout_emb = nn.Dropout(options.emb_dropout) if options.freeze_embeddings: self.word_emb.weight.requires_grad = False features_size = options.word_embeddings_size # # CNN 1D # self.cnn_1d = nn.Conv1d(in_channels=features_size, out_channels=options.conv_size, kernel_size=options.kernel_size, padding=options.kernel_size // 2) self.max_pool = nn.MaxPool1d(options.pool_length, padding=options.pool_length // 2) self.dropout_cnn = nn.Dropout(options.cnn_dropout) self.relu = torch.nn.ReLU() features_size = (options.conv_size // options.pool_length + options.pool_length // 2) # # Attention # # they are equal for self-attention query_size = key_size = value_size = features_size if options.attn_scorer == 'dot_product': self.attn_scorer = DotProductScorer(scaled=True) elif options.attn_scorer == 'general': self.attn_scorer = GeneralScorer(query_size, key_size) elif options.attn_scorer == 'add': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='add') elif options.attn_scorer == 'concat': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='concat') elif options.attn_scorer == 'mlp': self.attn_scorer = MLPScorer(query_size, key_size) else: raise Exception('Attention scorer `{}` not available'.format( options.attn_scorer)) if options.attn_type == 'regular': self.attn = Attention(self.attn_scorer, dropout=options.attn_dropout) elif options.attn_type == 'multihead': self.attn = MultiHeadedAttention( self.attn_scorer, options.attn_nb_heads, query_size, key_size, value_size, options.attn_multihead_hidden_size, dropout=options.attn_dropout) features_size = options.attn_multihead_hidden_size else: raise Exception('Attention `{}` not available'.format( options.attn_type)) # # Linear # self.linear_out = nn.Linear(features_size, self.nb_classes) self.init_weights() self.is_built = True
def __init__( self, source_vocab_size, target_vocab_size, nb_layers=6, hidden_size=512, ff_hidden_size=2048, nb_heads=8, max_seq_len=5000, dropout_encoder=0.1, dropout_decoder=0.1, dropout_attention=0.1, dropout_emb=0.1, ): super().__init__() # for dot product they should have the same hidden size query_size = key_size = value_size = hidden_size # encoder layer blocks encoder_scorer = DotProductScorer() encoder_attn = MultiHeadedAttention( encoder_scorer, nb_heads, query_size, key_size, value_size, hidden_size, dropout=dropout_attention, ) encoder_ff = PositionwiseFeedForward(hidden_size, ff_hidden_size) encoder_layer = EncoderLayer(encoder_attn, encoder_ff, dropout=dropout_encoder) # decoder layer blocks decoder_self_scorer = DotProductScorer() decoder_self_attn = MultiHeadedAttention( decoder_self_scorer, nb_heads, query_size, key_size, value_size, hidden_size, dropout=dropout_attention, ) decoder_source_scorer = DotProductScorer() decoder_source_attn = MultiHeadedAttention( decoder_source_scorer, nb_heads, query_size, key_size, value_size, hidden_size, dropout=dropout_attention, ) decoder_ff = PositionwiseFeedForward(hidden_size, ff_hidden_size) decoder_layer = DecoderLayer( decoder_self_attn, decoder_source_attn, decoder_ff, dropout=dropout_decoder, ) self.encoder_emb = PositionalEmbedding( source_vocab_size, hidden_size, max_seq_len=max_seq_len, dropout=dropout_emb, ) self.decoder_emb = PositionalEmbedding( target_vocab_size, hidden_size, max_seq_len=max_seq_len, dropout=dropout_emb, ) self.encoder = TransformerEncoder(encoder_layer, nb_layers=nb_layers) self.decoder = TransformerDecoder(decoder_layer, nb_layers=nb_layers) self.generator = TransformerGenerator(hidden_size, target_vocab_size) self._init_params()
def __init__(self, words_field, tags_field, options): super().__init__(words_field, tags_field) word_embeddings = None if self.words_field.vocab.vectors is not None: word_embeddings = self.words_field.vocab.vectors options.word_embeddings_size = word_embeddings.size(1) self.word_emb = nn.Embedding( num_embeddings=len(self.words_field.vocab), embedding_dim=options.word_embeddings_size, padding_idx=constants.PAD_ID, _weight=word_embeddings) features_size = options.word_embeddings_size if options.freeze_embeddings: self.word_emb.weight.requires_grad = False self.is_bidir = options.bidirectional self.sum_bidir = options.sum_bidir self.rnn_type = options.rnn_type rnn_class = nn.RNN batch_first = True if self.rnn_type == 'gru': rnn_class = nn.GRU elif self.rnn_type == 'lstm': rnn_class = nn.LSTM elif self.rnn_type == 'qrnn': from torchqrnn import QRNN rnn_class = QRNN batch_first = False hidden_size = options.hidden_size[0] self.hidden = None self.rnn = rnn_class(features_size, hidden_size, bidirectional=self.is_bidir, batch_first=batch_first) features_size = hidden_size # # Attention # # they are equal for self-attention n = 1 if not self.is_bidir or self.sum_bidir else 2 query_size = key_size = value_size = n * features_size if options.attn_scorer == 'dot_product': self.attn_scorer = DotProductScorer(scaled=True) elif options.attn_scorer == 'general': self.attn_scorer = GeneralScorer(query_size, key_size) elif options.attn_scorer == 'add': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='add') elif options.attn_scorer == 'concat': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='concat') elif options.attn_scorer == 'mlp': self.attn_scorer = MLPScorer(query_size, key_size) else: raise Exception('Attention scorer `{}` not available'.format( options.attn_scorer)) if options.attn_type == 'regular': self.attn = Attention(self.attn_scorer, dropout=options.attn_dropout) elif options.attn_type == 'multihead': self.attn = MultiHeadedAttention( self.attn_scorer, options.attn_nb_heads, query_size, key_size, value_size, options.attn_multihead_hidden_size, dropout=options.attn_dropout) features_size = options.attn_multihead_hidden_size else: raise Exception('Attention `{}` not available'.format( options.attn_type)) self.crf = CRF( self.nb_classes, bos_tag_id=self.tags_field.vocab.stoi['_'], # hack eos_tag_id=self.tags_field.vocab.stoi['.'], # hack pad_tag_id=None, batch_first=True, ) # # Linear # self.linear_out = nn.Linear(features_size, self.nb_classes) self.selu = torch.nn.SELU() self.dropout_emb = nn.Dropout(options.emb_dropout) self.dropout_rnn = nn.Dropout(options.rnn_dropout) self.init_weights() self.is_built = True
def __init__(self, words_field, tags_field, options): super().__init__(words_field, tags_field) # # Embeddings # word_embeddings = None if self.words_field.vocab.vectors is not None: word_embeddings = self.words_field.vocab.vectors options.word_embeddings_size = word_embeddings.size(1) self.word_emb = nn.Embedding( num_embeddings=len(self.words_field.vocab), embedding_dim=options.word_embeddings_size, padding_idx=constants.PAD_ID, _weight=word_embeddings, ) self.dropout_emb = nn.Dropout(options.emb_dropout) if options.freeze_embeddings: self.word_emb.weight.requires_grad = False features_size = options.word_embeddings_size # # Attention # # they are equal for self-attention query_size = key_size = value_size = features_size if options.attn_scorer == 'dot_product': self.attn_scorer = DotProductScorer(scaled=True) elif options.attn_scorer == 'general': self.attn_scorer = GeneralScorer(query_size, key_size) elif options.attn_scorer == 'add': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='add') elif options.attn_scorer == 'concat': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='concat') elif options.attn_scorer == 'mlp': self.attn_scorer = MLPScorer(query_size, key_size) else: raise Exception('Attention scorer `{}` not available'.format( options.attn_scorer)) if options.attn_type == 'regular': self.attn = Attention(self.attn_scorer, dropout=options.attn_dropout) elif options.attn_type == 'multihead': self.attn = MultiHeadedAttention( self.attn_scorer, options.attn_nb_heads, query_size, key_size, value_size, options.attn_multihead_hidden_size, dropout=options.attn_dropout) features_size = options.attn_multihead_hidden_size else: raise Exception('Attention `{}` not available'.format( options.attn_type)) # # Linear # self.linear_out = nn.Linear(features_size, self.nb_classes) self.crf = CRF( self.nb_classes, bos_tag_id=self.tags_field.vocab.stoi['_'], # hack eos_tag_id=self.tags_field.vocab.stoi['.'], # hack pad_tag_id=None, batch_first=True, ) self.init_weights() self.is_built = True