def __init__( self, scorer, nb_heads, query_size, key_size, value_size, hidden_size, dropout=0.1, ): super().__init__() # ensure hidden size is divisible by the nb of heads assert hidden_size % nb_heads == 0 self.hidden_size = hidden_size self.nb_heads = nb_heads self.heads_size = hidden_size // nb_heads self.value_size = value_size self.proj_queries = nn.Linear(query_size, hidden_size) self.proj_keys = nn.Linear(key_size, hidden_size) self.proj_values = nn.Linear(value_size, hidden_size) self.attention = Attention(scorer, dropout=dropout) self.p_attn = None # useful if you'd like to see attention weights
def __init__(self, words_field, tags_field, options): super().__init__(words_field, tags_field) # # Embeddings # word_embeddings = None if self.words_field.vocab.vectors is not None: word_embeddings = self.words_field.vocab.vectors options.word_embeddings_size = word_embeddings.size(1) self.word_emb = nn.Embedding( num_embeddings=len(self.words_field.vocab), embedding_dim=options.word_embeddings_size, padding_idx=constants.PAD_ID, _weight=word_embeddings, ) self.dropout_emb = nn.Dropout(options.emb_dropout) if options.freeze_embeddings: self.word_emb.weight.requires_grad = False features_size = options.word_embeddings_size # # CNN 1D # self.cnn_1d = nn.Conv1d(in_channels=features_size, out_channels=options.conv_size, kernel_size=options.kernel_size, padding=options.kernel_size // 2) self.max_pool = nn.MaxPool1d(options.pool_length, padding=options.pool_length // 2) self.dropout_cnn = nn.Dropout(options.cnn_dropout) self.relu = torch.nn.ReLU() features_size = (options.conv_size // options.pool_length + options.pool_length // 2) # # Attention # # they are equal for self-attention query_size = key_size = value_size = features_size if options.attn_scorer == 'dot_product': self.attn_scorer = DotProductScorer(scaled=True) elif options.attn_scorer == 'general': self.attn_scorer = GeneralScorer(query_size, key_size) elif options.attn_scorer == 'add': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='add') elif options.attn_scorer == 'concat': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='concat') elif options.attn_scorer == 'mlp': self.attn_scorer = MLPScorer(query_size, key_size) else: raise Exception('Attention scorer `{}` not available'.format( options.attn_scorer)) if options.attn_type == 'regular': self.attn = Attention(self.attn_scorer, dropout=options.attn_dropout) elif options.attn_type == 'multihead': self.attn = MultiHeadedAttention( self.attn_scorer, options.attn_nb_heads, query_size, key_size, value_size, options.attn_multihead_hidden_size, dropout=options.attn_dropout) features_size = options.attn_multihead_hidden_size else: raise Exception('Attention `{}` not available'.format( options.attn_type)) # # Linear # self.linear_out = nn.Linear(features_size, self.nb_classes) self.init_weights() self.is_built = True
def __init__(self, words_field, tags_field, options): super().__init__(words_field, tags_field) word_embeddings = None if self.words_field.vocab.vectors is not None: word_embeddings = self.words_field.vocab.vectors options.word_embeddings_size = word_embeddings.size(1) self.word_emb = nn.Embedding( num_embeddings=len(self.words_field.vocab), embedding_dim=options.word_embeddings_size, padding_idx=constants.PAD_ID, _weight=word_embeddings) features_size = options.word_embeddings_size if options.freeze_embeddings: self.word_emb.weight.requires_grad = False self.is_bidir = options.bidirectional self.sum_bidir = options.sum_bidir self.rnn_type = options.rnn_type rnn_class = nn.RNN batch_first = True if self.rnn_type == 'gru': rnn_class = nn.GRU elif self.rnn_type == 'lstm': rnn_class = nn.LSTM elif self.rnn_type == 'qrnn': from torchqrnn import QRNN rnn_class = QRNN batch_first = False hidden_size = options.hidden_size[0] self.hidden = None self.rnn = rnn_class(features_size, hidden_size, bidirectional=self.is_bidir, batch_first=batch_first) features_size = hidden_size # # Attention # # they are equal for self-attention n = 1 if not self.is_bidir or self.sum_bidir else 2 query_size = key_size = value_size = n * features_size if options.attn_scorer == 'dot_product': self.attn_scorer = DotProductScorer(scaled=True) elif options.attn_scorer == 'general': self.attn_scorer = GeneralScorer(query_size, key_size) elif options.attn_scorer == 'add': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='add') elif options.attn_scorer == 'concat': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='concat') elif options.attn_scorer == 'mlp': self.attn_scorer = MLPScorer(query_size, key_size) else: raise Exception('Attention scorer `{}` not available'.format( options.attn_scorer)) if options.attn_type == 'regular': self.attn = Attention(self.attn_scorer, dropout=options.attn_dropout) elif options.attn_type == 'multihead': self.attn = MultiHeadedAttention( self.attn_scorer, options.attn_nb_heads, query_size, key_size, value_size, options.attn_multihead_hidden_size, dropout=options.attn_dropout) features_size = options.attn_multihead_hidden_size else: raise Exception('Attention `{}` not available'.format( options.attn_type)) self.crf = CRF( self.nb_classes, bos_tag_id=self.tags_field.vocab.stoi['_'], # hack eos_tag_id=self.tags_field.vocab.stoi['.'], # hack pad_tag_id=None, batch_first=True, ) # # Linear # self.linear_out = nn.Linear(features_size, self.nb_classes) self.selu = torch.nn.SELU() self.dropout_emb = nn.Dropout(options.emb_dropout) self.dropout_rnn = nn.Dropout(options.rnn_dropout) self.init_weights() self.is_built = True
def __init__(self, words_field, tags_field, options): super().__init__(words_field, tags_field) # # Embeddings # word_embeddings = None if self.words_field.vocab.vectors is not None: word_embeddings = self.words_field.vocab.vectors options.word_embeddings_size = word_embeddings.size(1) self.word_emb = nn.Embedding( num_embeddings=len(self.words_field.vocab), embedding_dim=options.word_embeddings_size, padding_idx=constants.PAD_ID, _weight=word_embeddings, ) self.dropout_emb = nn.Dropout(options.emb_dropout) if options.freeze_embeddings: self.word_emb.weight.requires_grad = False features_size = options.word_embeddings_size # # Attention # # they are equal for self-attention query_size = key_size = value_size = features_size if options.attn_scorer == 'dot_product': self.attn_scorer = DotProductScorer(scaled=True) elif options.attn_scorer == 'general': self.attn_scorer = GeneralScorer(query_size, key_size) elif options.attn_scorer == 'add': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='add') elif options.attn_scorer == 'concat': self.attn_scorer = OperationScorer(query_size, key_size, options.attn_hidden_size, op='concat') elif options.attn_scorer == 'mlp': self.attn_scorer = MLPScorer(query_size, key_size) else: raise Exception('Attention scorer `{}` not available'.format( options.attn_scorer)) if options.attn_type == 'regular': self.attn = Attention(self.attn_scorer, dropout=options.attn_dropout) elif options.attn_type == 'multihead': self.attn = MultiHeadedAttention( self.attn_scorer, options.attn_nb_heads, query_size, key_size, value_size, options.attn_multihead_hidden_size, dropout=options.attn_dropout) features_size = options.attn_multihead_hidden_size else: raise Exception('Attention `{}` not available'.format( options.attn_type)) # # Linear # self.linear_out = nn.Linear(features_size, self.nb_classes) self.crf = CRF( self.nb_classes, bos_tag_id=self.tags_field.vocab.stoi['_'], # hack eos_tag_id=self.tags_field.vocab.stoi['.'], # hack pad_tag_id=None, batch_first=True, ) self.init_weights() self.is_built = True