def test_read_dupped_with_vocab_opened(dupped_vocab, dupped_vectors): data = random.choice([GLOVE_DUPPED, W2V_DUPPED, LEADER_DUPPED]) mode = "r" if data == GLOVE_DUPPED else "rb" gold_vocab, gold_vectors = sample(dupped_vocab, dupped_vectors, 0.5) w, wv = read_with_vocab(open(DATA / data, mode), gold_vocab) assert w == gold_vocab np.testing.assert_allclose(wv, gold_vectors)
def test_read_with_vocab_opened(gold_vocab, gold_vectors): gold_vocab, gold_vectors = sample(gold_vocab, gold_vectors, 0.5) data = random.choice([GLOVE, W2V, LEADER, W2V_TEXT]) mode = "r" if data in (GLOVE, W2V_TEXT) else "rb" v, wv = read_with_vocab(open(DATA / data, mode), gold_vocab) assert v == gold_vocab np.testing.assert_allclose(wv, gold_vectors)
def test_read_dupped_with_vocab_with_extra(dupped_vocab, dupped_vectors): data = random.choice([GLOVE_DUPPED, W2V_DUPPED, LEADER_DUPPED]) user_vocab, user_vectors, extra_vocab, extra_vectors = split( dupped_vocab, dupped_vectors, 0.5) gold_vocab = {} gold_vectors = np.concatenate([user_vectors, extra_vectors], axis=0) for idx, (word, _) in enumerate( chain(sorted(user_vocab.items(), key=itemgetter(1)), sorted(extra_vocab.items(), key=itemgetter(1)))): gold_vocab[word] = idx w, wv = read_with_vocab(str(DATA / data), user_vocab, keep_extra=True) assert w == gold_vocab np.testing.assert_allclose(wv, gold_vectors)
def test_read_with_vocab_extra_pathlib(gold_vocab, gold_vectors): user_vocab, user_vectors, extra_vocab, extra_vectors = split( gold_vocab, gold_vectors, 0.5) gold_vocab = {} gold_vectors = np.concatenate([user_vectors, extra_vectors], axis=0) for idx, (word, _) in enumerate( chain(sorted(user_vocab.items(), key=itemgetter(1)), sorted(extra_vocab.items(), key=itemgetter(1)))): gold_vocab[word] = idx data = random.choice([GLOVE, W2V, LEADER, W2V_TEXT]) v, wv = read_with_vocab(DATA / data, user_vocab, keep_extra=True) assert v == gold_vocab np.testing.assert_allclose(wv, gold_vectors)
def test_read_dupped_with_vocab_pathlib(dupped_vocab, dupped_vectors): data = random.choice([GLOVE_DUPPED, W2V_DUPPED, LEADER_DUPPED]) gold_vocab, gold_vectors = sample(dupped_vocab, dupped_vectors, 0.5) w, wv = read_with_vocab(DATA / data, gold_vocab) assert w == gold_vocab np.testing.assert_allclose(wv, gold_vectors)
def test_read_with_vocab_pathlib(gold_vocab, gold_vectors): gold_vocab, gold_vectors = sample(gold_vocab, gold_vectors, 0.5) data = random.choice([GLOVE, W2V, LEADER, W2V_TEXT]) v, wv = read_with_vocab(DATA / data, gold_vocab) assert v == gold_vocab np.testing.assert_allclose(wv, gold_vectors)