def forward(self, batch): """ Forward pass given data. :param batch: List of samples containing data as transformed by the init transformer of this class. :return: A (batch of) vectors of length equal to tagset, scoring each possible class for each word in a sentence, for all sentences; a tensor containing the true label for each word and a tensor containing the lengths of the sequences in descending order. """ hidden_static = self.init_hidden(len(batch)) hidden_dyn = self.init_hidden(len(batch)) # embed using static embeddings and pass through the recurrent layer data, labels, char_data = data_manager.batch_sequence( batch, self.device) data_static = self.embedding_static(data) data_static = self.drop(data_static) lstm_out_static, hidden_static = self.recurrent_static( data_static, hidden_static) # embed using dynamic embeddings and pass through the recurrent layer data_dynamic = self.embedding_dyn(data) data_dynamic = self.drop(data_dynamic) lstm_out_dyn, hidden_dyn = self.recurrent_dyn(data_dynamic, hidden_dyn) # concatenate results output = torch.cat([lstm_out_static, lstm_out_dyn], dim=2) # send output to fc layer(s) tag_space = self.hidden2tag(output.unsqueeze(1).contiguous()) tag_scores = F.log_softmax(tag_space, dim=3) return tag_scores.view(-1, self.tagset_size), labels.view(-1)
def forward(self, batch): """ Forward pass given data. :param batch: List of samples containing data as transformed by the init transformer of this class. :return: A (batch of) vectors of length equal to tagset, scoring each possible class for each word in a sentence, for all sentences; a tensor containing the true label for each word and a tensor containing the lengths of the sequences in descending order. """ hidden = self.init_hidden(len(batch)) # pack sentences and pass through rnn data, labels, char_data, pos, ner = data_manager.batch_sequence( batch, self.device) if self.embedding is not None: data = self.embedding(data) # If we are using more features the we concatenate everything together if self.more_features: data = torch.cat([data, pos, ner], 2) data = self.drop(data) if self.c2v_weights is not None: batched_conv = [] char_data = self.char_embedding(char_data) char_data = self.drop(char_data) num_words = char_data.size()[2] for i in range(num_words): # get word for each batch, then convolute on the ith word of each batch and concatenate c = char_data[:, 0, i, :, :].unsqueeze(1) ngram1 = self.ngram1(c).view(char_data.size()[0], 1, 1, -1) ngram2 = self.ngram2(c).view(char_data.size()[0], 1, 1, -1) ngram3 = self.ngram3(c).view(char_data.size()[0], 1, 1, -1) ngram1 = self.fc1(ngram1) ngram2 = self.fc2(ngram2) ngram3 = self.fc3(ngram3) batched_conv.append(torch.cat([ngram1, ngram2, ngram3], dim=3)) batched_conv = torch.cat(batched_conv, dim=1).squeeze(2) data = torch.cat([data, batched_conv], dim=2) rec_out, hidden = self.recurrent(data, hidden) # send output to fc layer(s) tag_space = self.hidden2tag(rec_out.unsqueeze(1).contiguous()) tag_scores = F.log_softmax(tag_space, dim=3) return tag_scores.view(-1, self.tagset_size), labels.view(-1)
def forward(self, batch): """ Forward pass given data. :param batch: List of samples containing data as transformed by the init transformer of this class. :return: A (batch of) vectors of length equal to tagset, scoring each possible class for each word in a sentence, for all sentences; a tensor containing the true label for each word and a tensor containing the lengths of the sequences in descending order. """ # init hidden layers for both encoder and decoder section hidden_encoder = self.init_hidden_encoder(len(batch)) # pack data into a batch data, labels, _ = data_manager.batch_sequence(batch, self.device) data = self.embedding_encoder(data) data = self.drop(data) # encode encoder_output, hidden_encoder = self.gru_encoder(data, hidden_encoder) # set first token passed to decoder as tagset_size, mapped to the last row of the embedding decoder_input = torch.zeros(len(batch), 1).long() torch.add(decoder_input, self.tagset_size, decoder_input) # special start character decoder_input = decoder_input.to(self.device) # encoder will pass its hidden state to the decoder if self.bidirectional: # needs to be reshaped since a bidirectional layer will return (2, batch, hidden dim//2) hidden_decoder = torch.cat((hidden_encoder[0], hidden_encoder[1]), dim=1).unsqueeze(0) else: hidden_decoder = hidden_encoder # decode and output 1 word at a time results = [] for di in range(encoder_output.size() [1]): # max length of any phrase in the batch decoder_output, hidden_decoder = self.decoder_forward( decoder_input, hidden_decoder) _, topi = decoder_output.topk(1) # extract predicted label decoder_input = topi.squeeze(1).detach().to( self.device) # detach from history as input results.append(decoder_output) results = torch.cat(results, dim=1) return results.view(-1, self.tagset_size), labels.view(-1)
def forward(self, batch): # pre-elaborate hidden state sequence = self.prepare_batch(batch) embedded = self.embedding(sequence).view(sequence.size()[0], -1) hidden = self.fc(embedded).unsqueeze(0) if self.bidirectional: hidden = hidden.view(2, hidden.size()[1], -1) # output scores for each input embedding, use the pre-elaborated hidden state data, labels, char_data = data_manager.batch_sequence( batch, self.device) data = self.embedding(data) data = self.drop(data) rec_out, hidden = self.recurrent(data, hidden) # from output of the recurrent layer to a fc layer to map to tag space tag_space = self.to_tag_space(rec_out.unsqueeze(1).contiguous()) tag_scores = torch.nn.functional.log_softmax(tag_space, dim=3) return tag_scores.view(-1, self.tagset_size), labels.view(-1)
def forward(self, batch): data, labels, char_data = data_manager.batch_sequence( batch, self.device) lengths = self.get_lengths(labels) # get features and do predictions maximizing the sentence score using the crf feats = self.get_features_from_recurrent(data, char_data, lengths) scores, predictions = self.crf.viterbi_decode(feats, lengths) # pad predictions so that they match in length with padded labels batch_size, pad_to = labels.size() _, pad_from = predictions.size() padding = torch.zeros(batch_size, pad_to - pad_from).long().to(self.device) predictions = torch.cat([predictions, padding], dim=1) predictions = predictions.expand(*labels.size()) # remove start and stop tags if there are any (mostly for safety, should not happen) predictions[predictions == 43] = 0 predictions[predictions == 44] = 0 return predictions.view(-1), labels.view(-1)
def neg_log_likelihood(self, batch): """ Used for training, returns a loss that depends on the difference between the score that the model would give to the sentence annd the score it would give to the correct labeling of the sentence. :param batch: :return:Pytorch loss. """ data, labels, char_data = data_manager.batch_sequence( batch, self.device) lengths = self.get_lengths(labels) labels = self.get_labels(labels) # get feats (scores for each label, for each word) from recurrent feats = self.get_features_from_recurrent(data, char_data, lengths) # get score of sentence from crf norm_score = self.crf(feats, lengths) # get score that the model would give to the correct labels sequence_score = self.score(feats, labels, lengths) loglik = sequence_score - norm_score loglik = -loglik.mean() return loglik
def forward(self, batch): """ Forward pass given data. :param batch: List of samples containing data as transformed by the init transformer of this class. :return: A (batch of) vectors of length equal to tagset, scoring each possible class for each word in a sentence, for all sentences; a tensor containing the true label for each word and a tensor containing the lengths of the sequences in descending order. """ sentence_as_matrix = self.prepare_batch(batch) embedded = self.embedding(sentence_as_matrix) embedded = self.drop(embedded) # convolution on data n1 = self.ngram1(embedded) n2 = self.ngram2(embedded) n3 = self.ngram3(embedded) # combine result in a vector that will be the initial hidden state of the recurrent layer batch_size = embedded.size()[0] n1 = n1.view(batch_size, -1) n2 = n2.view(batch_size, -1) n3 = n3.view(batch_size, -1) hidden = torch.cat((n1, n2, n3), dim=1).unsqueeze(0) if self.bidirectional: hidden = hidden.view(2, hidden.size()[1], -1) data, labels, _ = data_manager.batch_sequence(batch, self.device) data = self.embedding(data) data = self.drop(data) lstm_out, hidden = self.lstm(data, hidden) # send output to fc layer(s) tag_space = self.to_tag_space(lstm_out.unsqueeze(1).contiguous()) tag_scores = torch.nn.functional.log_softmax(tag_space, dim=3) return tag_scores.view(-1, self.tagset_size), labels.view(-1)