Example #1
0
def test_similarity():
    # TFIDF.similarity Test
    tfidf1_sample = [['Apple', 1], ['Orange', 2], ['Banana', 1], ['Kiwi', 0]]
    tfidf2_sample = [['Apple', 1], ['Orange', 0], ['Banana', 2], ['Kiwi', 1]]

    # Tf-Idf cosは0.5であること
    r = TFIDF.similarity(tfidf1_sample, tfidf2_sample)
    assert round(r - 0.5, 7) == 0

    # 同じTf-IdfはTf-Idf cosが1.0であること
    r = TFIDF.similarity(tfidf1_sample, tfidf1_sample)
    assert round(r - 1, 7) == 0
Example #2
0
def test_tfidf():
    # 中日
    url = "https://ja.wikipedia.org/wiki/%E6%9D%B1%E4%BA%AC%E3%83%A4%E3%82%AF%E3%83%AB%E3%83%88%E3%82%B9%E3%83%AF%E3%83%AD%E3%83%BC%E3%82%BA"
    tfidf1 = TFIDF.gen_web(url)

    # 山本昌
    url = "https://ja.wikipedia.org/wiki/%E5%B1%B1%E6%9C%AC%E6%98%8C"
    tfidf2 = TFIDF.gen_web(url)

    # print tfidf1
    for k, v in tfidf1:
        print k, v

    print "~~~~~~~~~~~~~~~"

    # print tfidf2
    for k, v in tfidf2:
        print k, v

    print TFIDF.similarity(tfidf1, tfidf2)