def test_compare_score_should_return_highest_score_list(): reporter = DocReporter() input = [('word1', 'doc1', 5), ('word2', 'doc1', 3), ('word1', 'doc2', 6), ('word2', 'doc2', 8)] expected_output = {'word1': ('doc2', 6), 'word2': ('doc2', 8)} output = reporter.compare_score(input) assert output == expected_output
def test_compare_score_should_return_first_document_for_same_score(): reporter = DocReporter() input = [('word1', 'doc1', 5), ('word2', 'doc1', 5), ('word1', 'doc2', 5), ('word2', 'doc2', 5)] expected_output = {'word1': ('doc1', 5), 'word2': ('doc1', 5)} output = reporter.compare_score(input) assert output == expected_output
def test_calculate_tf_score_should_return_the_correct_score_list_of_one_word(): reporter = DocReporter() doc = 'doc1' input = 'Once upon a time, far, far away' # length = 7 words = ['time'] expected_output = [('time', 'doc1', 1.0 / 7.0)] output = reporter.calculate_tf_score(doc, input, words) assert output == expected_output
def test_calculate_tf_score_should_return_the_correct_score_for_list_of_words( ): reporter = DocReporter() doc = 'doc1' input = 'Once upon a time, far, far away' # length = 7 words = ['time', 'far'] expected_outputs = [('time', 'doc1', 1.0 / 7.0), ('far', 'doc1', 2.0 / 7.0)] outputs = reporter.calculate_tf_score(doc, input, words) for expected in expected_outputs: for output in outputs: if output[0] == expected[0]: assert output[2] == expected[2]
def test_compare_score_should_return_highest_score_when_score_is_close_to_zero( ): reporter = DocReporter() input = [('word1', 'doc1', 0.0), ('word2', 'doc1', 0.000001), ('word3', 'doc1', 0.0), ('word1', 'doc2', 0.0), ('word2', 'doc2', 0.0), ('word3', 'doc2', 0.000001)] expected_output = { 'word1': ('doc1', 0.0), 'word2': ('doc1', 0.000001), 'word3': ('doc2', 0.000001) } output = reporter.compare_score(input) assert output == expected_output
def test_calculate_tf_score_should_handle_uppercase_search_words(): count = 0 reporter = DocReporter() doc = 'doc1' input = 'Once upon a time, far, far away, I found myself at NASA' # Length is 12 words = ['I', 'NASA'] expected_outputs = [('i', 'doc1', 1.0 / 12.0), ('nasa', 'doc1', 1.0 / 12.0)] outputs = reporter.calculate_tf_score(doc, input, words) for expected in expected_outputs: for output in outputs: if output[0] == expected[0]: assert output[2] == expected[2] count += 1 if count != len(words): assert False
def test_calculate_tf_score_should_score_all_words(): count = 0 reporter = DocReporter() doc = 'doc1' input = 'Once upon a time, far, far away' # Length is 7 words = ['once', 'upon', 'a', 'time', 'far', 'away'] expected_outputs = [ ('once', 'doc1', 1.0 / 7.0), ('upon', 'doc1', 1.0 / 7.0), ('a', 'doc1', 1.0 / 7.0), ('time', 'doc1', 1.0 / 7.0), ('far', 'doc1', 2.0 / 7.0), ('away', 'doc1', 1.0 / 7.0) ] outputs = reporter.calculate_tf_score(doc, input, words) for expected in expected_outputs: for output in outputs: if output[0] == expected[0]: count += 1 if count != len(words): assert False
from doc_reporter import DocReporter reporter = DocReporter() docs = [ 'mobydick-chapter1.txt', 'mobydick-chapter2.txt', 'mobydick-chapter3.txt', 'mobydick-chapter4.txt', 'mobydick-chapter5.txt' ] words = ['queequeg', 'whale', 'sea'] reporter.create_report_from_local_documents(docs, words)