def test_word2idx_return_unknown_tag(vocabulary: dp.Vocabulary) -> None: # given word = "testnonexistingword" expected_result = vocabulary.word2idx("<UNK>") # when result = vocabulary.word2idx(word) # then assert result == expected_result
def test_idx2word_raises_index_error(vocabulary: dp.Vocabulary) -> None: # given index = -1 expected_message = "No word is mapped to -1" # when / then with pytest.raises(IndexError) as result: vocabulary.idx2word(index) assert str(result.value) == expected_message
def test_decode_caption(vocabulary: dp.Vocabulary) -> None: # given caption = [vocabulary.word2idx("a"), vocabulary.word2idx("man")] expected_result = "a man" # when result = dp.TextPipeline.decode_caption(vocabulary, caption) # then assert result == expected_result
def plot_embeddings(embeddings: np.array, words: List[str], vocab: dp.Vocabulary = dp.Vocabulary(), out_path: str = None) -> None: """Plot word embeddings in 2D. Args: embeddings (np.array): Embeddings matrix words (List[str]): List of words to be plotted vocab (dp.Vocabulary, optional): Vocabulary. Defaults to dp.Vocabulary(). """ E_reduced = reduce_to_k_dim(embeddings) x = [] y = [] for word in words: word_x, word_y = E_reduced[vocab.word2idx(word.lower())] x.append(word_x) y.append(word_y) plt.annotate(word, (word_x + 0.01, word_y + 0.01), color="blue", fontsize=12) plt.scatter(x, y, c="red") if out_path is not None: plt.savefig(out_path, dpi=100) plt.show()
def test_word2idx(vocabulary: dp.Vocabulary) -> None: # given word = "a" expected_result = 0 # when result = vocabulary.word2idx(word) # then assert result == expected_result
def test_idx2word(vocabulary: dp.Vocabulary) -> None: # given index = 0 expected_result = "a" # when result = vocabulary.idx2word(index) # then assert result == expected_result