Esempio n. 1
0
def test_top_words():
    # Seed random to ensure same results
    random.seed(1)
    test_parser = Parser(input_file="fake.txt", rules_file="fake.txt")
    for letter in string.ascii_lowercase:
        test_parser.final_words[letter] = random.randint(1, 20)
    top_words = test_parser.get_top_words(20)
    print(test_parser.final_words.most_common(20))
    print(top_words)
    assert top_words == [
        ("p", 20),
        ("b", 19),
        ("u", 19),
        ("f", 16),
        ("h", 16),
        ("l", 16),
        ("g", 15),
        ("r", 15),
        ("o", 14),
        ("i", 13),
        ("n", 13),
        ("w", 11),
        ("d", 9),
        ("s", 9),
        ("t", 8),
        ("j", 7),
        ("a", 5),
        ("e", 4),
        ("k", 4),
        ("v", 4),
    ]
Esempio n. 2
0
def parse_file(input_file, rules_file, number_of_results,
               use_collections_counter):
    parser = Parser(input_file=input_file, rules_file=rules_file)
    parser.parse_file()
    if not use_collections_counter:
        top_words = parser.get_top_words(number_of_results)
    else:
        print("INFO: Using collections counter")
        top_words = parser.final_words.most_common(number_of_results)
    print(f'| {"Word":<12} | Occurrences |')
    print(
        f'|{"-".join(["" for _ in range(15)])}|{"-".join(["" for _ in range(14)])}|'
    )
    for row in top_words:
        print(f"| { row[0]:<12} | {row[1]:>11} |")