Exemple #1
0
def test_read_dupped_with_vocab_opened(dupped_vocab, dupped_vectors):
    data = random.choice([GLOVE_DUPPED, W2V_DUPPED, LEADER_DUPPED])
    mode = "r" if data == GLOVE_DUPPED else "rb"
    gold_vocab, gold_vectors = sample(dupped_vocab, dupped_vectors, 0.5)
    w, wv = read_with_vocab(open(DATA / data, mode), gold_vocab)
    assert w == gold_vocab
    np.testing.assert_allclose(wv, gold_vectors)
Exemple #2
0
def test_read_with_vocab_opened(gold_vocab, gold_vectors):
    gold_vocab, gold_vectors = sample(gold_vocab, gold_vectors, 0.5)
    data = random.choice([GLOVE, W2V, LEADER, W2V_TEXT])
    mode = "r" if data in (GLOVE, W2V_TEXT) else "rb"
    v, wv = read_with_vocab(open(DATA / data, mode), gold_vocab)
    assert v == gold_vocab
    np.testing.assert_allclose(wv, gold_vectors)
Exemple #3
0
def test_read_dupped_with_vocab_with_extra(dupped_vocab, dupped_vectors):
    data = random.choice([GLOVE_DUPPED, W2V_DUPPED, LEADER_DUPPED])
    user_vocab, user_vectors, extra_vocab, extra_vectors = split(
        dupped_vocab, dupped_vectors, 0.5)
    gold_vocab = {}
    gold_vectors = np.concatenate([user_vectors, extra_vectors], axis=0)
    for idx, (word, _) in enumerate(
            chain(sorted(user_vocab.items(), key=itemgetter(1)),
                  sorted(extra_vocab.items(), key=itemgetter(1)))):
        gold_vocab[word] = idx

    w, wv = read_with_vocab(str(DATA / data), user_vocab, keep_extra=True)
    assert w == gold_vocab
    np.testing.assert_allclose(wv, gold_vectors)
Exemple #4
0
def test_read_with_vocab_extra_pathlib(gold_vocab, gold_vectors):
    user_vocab, user_vectors, extra_vocab, extra_vectors = split(
        gold_vocab, gold_vectors, 0.5)
    gold_vocab = {}
    gold_vectors = np.concatenate([user_vectors, extra_vectors], axis=0)
    for idx, (word, _) in enumerate(
            chain(sorted(user_vocab.items(), key=itemgetter(1)),
                  sorted(extra_vocab.items(), key=itemgetter(1)))):
        gold_vocab[word] = idx

    data = random.choice([GLOVE, W2V, LEADER, W2V_TEXT])
    v, wv = read_with_vocab(DATA / data, user_vocab, keep_extra=True)
    assert v == gold_vocab
    np.testing.assert_allclose(wv, gold_vectors)
Exemple #5
0
def test_read_dupped_with_vocab_pathlib(dupped_vocab, dupped_vectors):
    data = random.choice([GLOVE_DUPPED, W2V_DUPPED, LEADER_DUPPED])
    gold_vocab, gold_vectors = sample(dupped_vocab, dupped_vectors, 0.5)
    w, wv = read_with_vocab(DATA / data, gold_vocab)
    assert w == gold_vocab
    np.testing.assert_allclose(wv, gold_vectors)
Exemple #6
0
def test_read_with_vocab_pathlib(gold_vocab, gold_vectors):
    gold_vocab, gold_vectors = sample(gold_vocab, gold_vectors, 0.5)
    data = random.choice([GLOVE, W2V, LEADER, W2V_TEXT])
    v, wv = read_with_vocab(DATA / data, gold_vocab)
    assert v == gold_vocab
    np.testing.assert_allclose(wv, gold_vectors)