def check_by_input_file(input_name): input_file = os.path.join("test/data/scorer", input_name) test_data = yaml.load(open(input_file).read()) scorer = Scorer(test_data['input']) scores = scorer.calculate_scores() expected_scores = test_data['scores'] assert scores == expected_scores, "Incorrect scores for '{0}'.".format(input_name)
def test_invalid_data(): something = object() input_ = { "TLA1": something, "TLA2": something } error = Exception('bacon') def checker(tla, data): assert data is something, "Wrong data passed to validator" raise error with mock.patch('scorer.validate_team', create=True) as mock_validate: mock_validate.side_effect = checker threw = False try: scorer = Scorer(input_) actual = scorer.calculate_scores() except Exception as e: threw = True assert e is error assert threw, "Should have experienced an error from the validator"
STOP_WORDS = ['d01', 'd02', 'd03', 'd04', 'd05', 'd06', 'd07', 'd08', 'a', 'also', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'do', 'for', 'have', 'is', 'in', 'it', 'of', 'or', 'see', 'so', 'that', 'the', 'this', 'to', 'we'] crawler = Crawler([urljoin(SEED_URL, page) for page in SEED_PAGES]) page_rank = PageRank(crawler.webgraph_in, crawler.webgraph_out) page_rank.build_graph() index = Indexer(crawler.contents, STOP_WORDS) index.build_index() scorer = Scorer(index) print("> SIMPLE SEARCH ENGINE (by Tammo, Tim & Flo)") while True: scores = scorer.calculate_scores(input("\n> query: ")) if not scores: print("your search term does not occur on any page") continue ranked_scores = [(url, score, page_rank.get_rank(url), score * page_rank.get_rank(url)) for url, score in scores.items()] print("\n url | score | rank | rank * score\n" + "-" * 54) for url, score, rank, ranked_score in sorted(ranked_scores, key=lambda element: element[3], reverse=True): print(" ..{} | {:.4f} | {:.4f} | {:.4f}".format(url[-15:], round(score, 6), round(rank, 6), round(ranked_score, 6)))
d08_len = index.documents_length['http://mysql12.f4.htw-berlin.de/crawl/d08.html'] print(" d08 length: " + ("OK" if round(d08_len, 6) == 2.727447 else "WRONG")) d06_len = index.documents_length['http://mysql12.f4.htw-berlin.de/crawl/d06.html'] print(" d06 length: " + ("OK" if round(d06_len, 6) == 1.974093 else "WRONG")) d04_len = index.documents_length['http://mysql12.f4.htw-berlin.de/crawl/d04.html'] print(" d04 length: " + ("OK" if round(d04_len, 6) == 4.312757 else "WRONG")) print("\n# Scorer TEST") scorer = Scorer(index) tokens_scores = scorer.calculate_scores('tokens') tokens_scores_check = all( ((round(tokens_scores['http://mysql12.f4.htw-berlin.de/crawl/d08.html'], 6) == 0.119897), (round(tokens_scores['http://mysql12.f4.htw-berlin.de/crawl/d02.html'], 6) == 0.093106), (round(tokens_scores['http://mysql12.f4.htw-berlin.de/crawl/d04.html'], 6) == 0.061577), (round(tokens_scores['http://mysql12.f4.htw-berlin.de/crawl/d01.html'], 6) == 0.051784), (round(tokens_scores['http://mysql12.f4.htw-berlin.de/crawl/d03.html'], 6) == 0.045677))) print(" 'tokens' score: " + ("OK" if tokens_scores_check else "WRONG")) index_scores = scorer.calculate_scores('index') index_scores_check = all( ((round(index_scores['http://mysql12.f4.htw-berlin.de/crawl/d08.html'], 6) == 0.250207), (round(index_scores['http://mysql12.f4.htw-berlin.de/crawl/d05.html'], 6) == 0.233073), (round(index_scores['http://mysql12.f4.htw-berlin.de/crawl/d04.html'], 6) == 0.098769))) print(" 'index' score: " + ("OK" if index_scores_check else "WRONG"))