Ejemplo n.º 1
0
def process_input(data):
    """
    Input is in the form of tuples of (class:int, sent:string)
    """
    return [(prepare_vec_sequence(wordpunct_tokenize(sent),
                                  word_to_vec,
                                  SENTENCE_DIM,
                                  output='tensor'), label)
            for sent, label in data]
Ejemplo n.º 2
0
def predict(model, input_data, k=1):
    with torch.no_grad():
        result = []
        for sentence in input_data:
            tokens_in = wordpunct_tokenize(sentence)
            sentence_in = prepare_vec_sequence(tokens_in, word_to_vec, SENTENCE_DIM, output='variable')
            scores = F.softmax(model(sentence_in), dim=-1)
            topk_scores = torch.topk(scores, k)

            result.append(topk_scores)
        return result
Ejemplo n.º 3
0
    def neg_log_likelihood(self, sentence, tags):
        word_embeds = prepare_vec_sequence(sentence,
                                           word_to_vec,
                                           output='variable')

        if self.is_cuda:
            word_embeds = word_embeds.cuda()

        char_embeds = self.word_encoder(sentence).unsqueeze(1)

        sentence_in = torch.cat((word_embeds, char_embeds), dim=-1)
        sentence_in = self.dropout(sentence_in)

        feats = self._get_lstm_features(sentence_in)
        forward_score = self._forward_alg(feats)
        gold_score = self._score_sentence(feats, tags)
        return forward_score - gold_score
Ejemplo n.º 4
0
    def forward(self, sentence):  # dont confuse this with _forward_alg above.
        word_embeds = prepare_vec_sequence(sentence,
                                           word_to_vec,
                                           output='variable')

        if self.is_cuda:
            word_embeds = word_embeds.cuda()

        char_embeds = self.word_encoder(sentence).unsqueeze(1)

        sentence_in = torch.cat((word_embeds, char_embeds), dim=-1)

        # Get the emission scores from the BiLSTM
        lstm_feats = self._get_lstm_features(sentence_in)

        # Find the best path, given the features.
        score, tag_seq = self._viterbi_decode(lstm_feats)
        return score, tag_seq