Esempio n. 1
0
def test_build_index():
    test_list = [
        (1, ["я", "любить", "инфопоиск"]),
        (2, ["ты", "любить", "алгебра"]),
        (3, ["я", "ходить", "ты", "я"]),
        (4, []),
        (5, ["ты", "ненавидеть", "матанализ"]),
        (6, ["мы", "ходить", "матмех"]),
        (7, ["они", "любить", "ты", "я"]),
        (8, ["они"]),
        (9, ["я", "тащить", "каток"]),
        (10, ["ты", "сдать", "механика"]),
        (11, ["не", "любить", "не", "ты"]),
        (12, ["кусать"]),
    ]
    tempdir = tempfile.gettempdir()
    index_file = os.path.join(tempdir, "final_file.txt")
    index.build_index(test_list, index_file, 3)

    true_index = [list(t) for t in index.sub_block_indexation(test_list)]

    with open(index_file) as file:
        merged_index = [json.loads(line) for line in file][:-1]

    assert merged_index == true_index
Esempio n. 2
0
def test_search_token(tmpdir):
    docs = ["1 2 3 4", "2 3 4 5", "6 7"]
    docs = enumerate(docs)

    index_file = os.path.join(tmpdir, "index.json")
    build_index(docs, index_file)

    q = sympify("3")
    found_docs = search.find_term(q, index_file)
    assert found_docs == [0, 1]

    q = sympify("8")
    found_docs = search.find_term(q, index_file)
    assert found_docs == []

    q = sympify("6")
    found_docs = search.find_term(q, index_file)
    assert found_docs == [2]
Esempio n. 3
0
def test_search_and_token(tmpdir):
    docs = ["a b c d", "b c d e", "f g"]
    docs = enumerate(docs)
    doc_ids = list(range(3))

    index_file = os.path.join(tmpdir, "index.json")
    build_index(docs, index_file)

    q = sympify("A & B")
    found_docs = search.search(q, index_file, doc_ids)
    assert found_docs == [0]

    q = sympify("F & B")
    found_docs = search.search(q, index_file, doc_ids)
    assert found_docs == []

    q = sympify("B & C & D")
    found_docs = search.search(q, index_file, doc_ids)
    assert found_docs == [0, 1]
Esempio n. 4
0
def test_search(tmpdir):
    docs = ["a b c d", "b c d f", "a f g"]
    docs = enumerate(docs)
    doc_ids = list(range(3))

    index_file = os.path.join(tmpdir, "index.json")
    build_index(docs, index_file)

    q = sympify("(A | B) & F")
    found_docs = search.search(q, index_file, doc_ids)
    assert found_docs == [1, 2]

    q = sympify("(F | B) & ~ G")
    found_docs = search.search(q, index_file, doc_ids)
    assert found_docs == [0, 1]

    q = sympify("C & D & ~ F | G")
    found_docs = search.search(q, index_file, doc_ids)
    assert found_docs == [0, 2]

    q = sympify("P & D & F")
    found_docs = search.search(q, index_file, doc_ids)
    assert found_docs == []