Esempio n. 1
0
def test_vectorize_docs():
    docs = load_line_corpus(CURDIR + "/data/corpus.txt")
    mat, vocab = vectorize_docs(docs)
    print vocab
    assert_equal(vocab[0], u"product")
    assert_equal(vocab[1], u"right")
    assert_equal(len(vocab), 14)

    assert_equal(len(mat), 2)

    for doc, doc_m in zip(docs, mat):
        assert_equal([vocab[w] for w in doc_m], doc)
Esempio n. 2
0
def test_load_line_corpus():
    docs = load_line_corpus(CURDIR + "/data/corpus.txt")
    assert_equal(len(docs), 2)
    assert_equal(
        docs[0],
        [
            u"product",
            u"defin",
            u"number",
            u"column",
            u"figur",
            u"right",
            u"illustr",
            u"diagrammat",
            u"product",
            u"two",
            u"matric",
        ],
    )
Esempio n. 3
0
def test_doc2term_matrix():
    docs = load_line_corpus(CURDIR + "/data/corpus.txt")
    mat = doc2term_matrix(docs)
    assert_equal(mat.shape, (2, 14))