def test_sample_pagerank(self): # Test Corpus 0 corpus = pr.crawl("corpus0") ranks = pr.iterate_pagerank(corpus, pr.DAMPING) rankSum = 0 for rank in ranks.values(): rankSum += rank self.assertAlmostEqual(rankSum, 1, 1) # Test Corpus 1 corpus = pr.crawl("corpus1") ranks = pr.iterate_pagerank(corpus, pr.DAMPING) rankSum = 0 for rank in ranks.values(): rankSum += rank self.assertAlmostEqual(rankSum, 1, 1) # Test Corpus 2 corpus = pr.crawl("corpus2") ranks = pr.iterate_pagerank(corpus, pr.DAMPING) rankSum = 0 for rank in ranks.values(): rankSum += rank self.assertAlmostEqual(rankSum, 1, 1)
def test_iterate_pagerank(): assert pagerank.iterate_pagerank(pagerank.crawl("corpus0"), DAMPING) == ITERATION_PAGE_RANKS_0 assert pagerank.iterate_pagerank(pagerank.crawl("corpus1"), DAMPING) == ITERATION_PAGE_RANKS_1 assert pagerank.iterate_pagerank(pagerank.crawl("corpus2"), DAMPING) == ITERATION_PAGE_RANKS_2
def test_iterate_pagerank(self): corpus = { "1.html": {"2.html", "3.html"}, "2.html": {"3.html"}, "3.html": {"2.html"} } damping_factor = 0.85 result = iterate_pagerank(corpus, damping_factor) self.assertEqual(1, round(sum(result.values()), 4))
def test_iterate_pagerank_corpus0(self): corpus = { "1.html": {"2.html"}, "2.html": {"1.html", "3.html"}, "3.html": {"2.html", "4.html"}, "4.html": {"2.html"} } damping_factor = 0.85 result = iterate_pagerank(corpus, damping_factor) self.assertEqual(1, round(sum(result.values()), 4)) self.assertEqual(0.2202, round(result["1.html"], 4)) self.assertEqual(0.4289, round(result["2.html"], 4)) self.assertEqual(0.2202, round(result["3.html"], 4)) self.assertEqual(0.1307, round(result["4.html"], 4))
import os import random import re import sys import pagerank if len(sys.argv) != 2: sys.exit("Usage: python pagerank.py corpus") corpus = pagerank.crawl(sys.argv[1]) # Test sampling # print(pagerank.sample_pagerank(corpus, 0.8, 100)) # Test iteration print(pagerank.iterate_pagerank(corpus, 0.8))
import pagerank as pr my_dicy = { "1.html": {"2.html", "3.html"}, "2.html": {"3.html"}, "3.html": {"2.html"}} pr.iterate_pagerank(my_dicy, 0.85)
from pagerank import transition_model, sample_pagerank, iterate_pagerank corpus = { "1.html": {"2.html", "3.html"}, "2.html": {"3.html"}, "3.html": {"2.html"} } page = "1.html" damp = 0.85 transition_model(corpus, page, damp) print(sample_pagerank(corpus, 0.85, 5000)) print(iterate_pagerank(corpus, 0.85))
def testIteratePagerank(self): pagerank_3 = pr.iterate_pagerank(self.corpus_3, DAMPING) self.assertEqual(pagerank_3, {'1.html': 0.232539658203125, '2.html': 0.43412700846354163, '3.html': 0.3333333333333333})