def test_trellis_score(): global nb_weights, hmm_trans_weights, tag_to_ix, all_tags, vocab, word_to_ix sentence = "they can can fish".split() initial_vec = np.full((1, len(all_tags)), -np.inf) initial_vec[0][ tag_to_ix[START_TAG]] = 0 #setting all the score to START_TAG prev_scores = torch.autograd.Variable( torch.from_numpy(initial_vec.astype(np.float32))) emission_probs, tag_transition_probs = hmm.compute_weights_variables(nb_weights, hmm_trans_weights,\ vocab, word_to_ix, tag_to_ix) path_score, best_path = viterbi.build_trellis( all_tags, tag_to_ix, [emission_probs[word_to_ix[w]] for w in sentence], tag_transition_probs) eq_(path_score.data.numpy(), -17) sentence = "they can can can can fish".split() path_score, best_path = viterbi.build_trellis( all_tags, tag_to_ix, [emission_probs[word_to_ix[w]] for w in sentence], tag_transition_probs) eq_(path_score.data.numpy(), -25.)
def predict(self, sentence): """ This function predicts the tags by using the viterbi algorithm. You should be calling the viterbi algorithm from here. Inputs: - feats: the hidden state scores for each token in the input sentence. Consider this to be the emission potential of each token for each tag. - gold_tags: the gold sequence of tags :returns: - the best_path which is a sequence of tags """ lstm_feats = self.forward(sentence).view(len(sentence), -1) all_tags = [tag for tag, value in self.tag_to_ix.items()] score, path = viterbi.build_trellis(all_tags, self.tag_to_ix, lstm_feats, self.transitions) return path
def test_hmm_on_example_sentence(): global nb_weights, hmm_trans_weights, all_tags tag_to_ix={} for tag in list(all_tags): tag_to_ix[tag]=len(tag_to_ix) vocab, word_to_ix = most_common.get_word_to_ix(TRAIN_FILE) emission_probs, tag_transition_probs = hmm.compute_weights_variables(nb_weights, hmm_trans_weights, \ vocab, word_to_ix, tag_to_ix) score, pred_tags = viterbi.build_trellis(all_tags, tag_to_ix, [emission_probs[word_to_ix[w]] for w in ['they', 'can', 'can', 'fish','.']], tag_transition_probs) assert_almost_equal(score.data.numpy()[0],-32.4456, places=2) eq_(pred_tags,['PRON', 'AUX', 'AUX', 'NOUN','PUNCT'])
def test_hmm_on_example_sentence(): global nb_weights, hmm_trans_weights, all_tags tag_to_ix = {} for tag in list(all_tags): tag_to_ix[tag] = len(tag_to_ix) vocab, word_to_ix = most_common.get_word_to_ix(TRAIN_FILE) emission_probs, tag_transition_probs = hmm.compute_weights_variables(nb_weights, hmm_trans_weights, \ vocab, word_to_ix, tag_to_ix) score, pred_tags = viterbi.build_trellis(all_tags, tag_to_ix, [ emission_probs[word_to_ix[w]] for w in ['they', 'can', 'can', 'fish', '.'] ], tag_transition_probs) assert_almost_equal(score.data.numpy()[0], -32.4456, places=2) eq_(pred_tags, ['PRON', 'AUX', 'AUX', 'NOUN', 'PUNCT'])
def test_build_trellis(): global hand_weights sentence = "they can can fish".split() all_tags = ['NOUN','VERB'] # modify the hand weights so you can't read off the answer to 3.1 :) hand_weights['NOUN','they',EMIT] = -2 hand_weights['VERB','fish',EMIT] = -5 hand_weights['VERB','VERB',TRANS] = -2 trellis = viterbi.build_trellis(sentence,hmm.hmm_features,hand_weights,all_tags) eq_(len(trellis),4) eq_(len(trellis[-1]),2) eq_(trellis[-1]['VERB'],(-18,'VERB')) eq_(trellis[-2]['NOUN'],(-11,'VERB')) eq_(trellis[1]['VERB'],(-7,'NOUN')) eq_(trellis[1]['NOUN'],(-11,'NOUN'))
def test_build_trellis(): global hand_weights sentence = "they can can fish".split() all_tags = ['NOUN', 'VERB'] # modify the hand weights so you can't read off the answer to 3.1 :) hand_weights['NOUN', 'they', EMIT] = -2 hand_weights['VERB', 'fish', EMIT] = -5 hand_weights['VERB', 'VERB', TRANS] = -2 trellis = viterbi.build_trellis(sentence, hmm.hmm_features, hand_weights, all_tags) eq_(len(trellis), 4) eq_(len(trellis[-1]), 2) eq_(trellis[-1]['VERB'], (-18, 'VERB')) eq_(trellis[-2]['NOUN'], (-11, 'VERB')) eq_(trellis[1]['VERB'], (-7, 'NOUN')) eq_(trellis[1]['NOUN'], (-11, 'NOUN'))