コード例 #1
0
def test_tokenize_text_with_calculate_idf(dummy_collection_config, trec_index, tmpdir):
    toks_list = [["to", "be", "or", "not", "to", "be"]]
    feature = EmbedText(tmpdir, tmpdir, {}, index=trec_index)
    feature.build_stoi(toks_list, True, True)
    assert feature.stoi == {"<pad>": 0, "to": 1, "be": 2, "or": 3, "not": 4}

    assert feature.idf == {"be": 1.791759469228055, "not": 1.791759469228055, "or": 1.791759469228055, "to": 1.791759469228055}
コード例 #2
0
def test_tokenize_text(trec_index, tmpdir):
    toks_list = [["to", "be", "or", "not", "to", "be"]]
    feature = EmbedText(tmpdir, tmpdir, {}, index=trec_index)
    feature.build_stoi(toks_list, True, False)
    assert feature.stoi == {"<pad>": 0, "to": 1, "be": 2, "or": 3, "not": 4}

    assert feature.idf == {}