def test_iterate_pagerank(): assert pagerank.iterate_pagerank(pagerank.crawl("corpus0"), DAMPING) == ITERATION_PAGE_RANKS_0 assert pagerank.iterate_pagerank(pagerank.crawl("corpus1"), DAMPING) == ITERATION_PAGE_RANKS_1 assert pagerank.iterate_pagerank(pagerank.crawl("corpus2"), DAMPING) == ITERATION_PAGE_RANKS_2
def test_sample_pagerank(self): # Test Corpus 0 corpus = pr.crawl("corpus0") ranks = pr.iterate_pagerank(corpus, pr.DAMPING) rankSum = 0 for rank in ranks.values(): rankSum += rank self.assertAlmostEqual(rankSum, 1, 1) # Test Corpus 1 corpus = pr.crawl("corpus1") ranks = pr.iterate_pagerank(corpus, pr.DAMPING) rankSum = 0 for rank in ranks.values(): rankSum += rank self.assertAlmostEqual(rankSum, 1, 1) # Test Corpus 2 corpus = pr.crawl("corpus2") ranks = pr.iterate_pagerank(corpus, pr.DAMPING) rankSum = 0 for rank in ranks.values(): rankSum += rank self.assertAlmostEqual(rankSum, 1, 1)
def test_sample_pagerank(): random.seed(1) assert pagerank.sample_pagerank(pagerank.crawl("corpus0"), DAMPING, SAMPLES) == PAGE_RANKS_0 assert pagerank.sample_pagerank(pagerank.crawl("corpus1"), DAMPING, SAMPLES) == PAGE_RANKS_1 result2 = pagerank.sample_pagerank(pagerank.crawl("corpus2"), DAMPING, SAMPLES) assert result2 == PAGE_RANKS_2 assert sum(list( result2.values())) == 1 # Confirm result is properely normalized # Confirm that process is pseudoRandom assert pagerank.sample_pagerank(pagerank.crawl("corpus2"), DAMPING, SAMPLES) != result2
import os import random import re import sys import pagerank if len(sys.argv) != 2: sys.exit("Usage: python pagerank.py corpus") corpus = pagerank.crawl(sys.argv[1]) # Test sampling # print(pagerank.sample_pagerank(corpus, 0.8, 100)) # Test iteration print(pagerank.iterate_pagerank(corpus, 0.8))
def test_crawl(): assert pagerank.crawl() == CRAWL0 assert pagerank.crawl("corpus0") == CRAWL0 assert pagerank.crawl("corpus1") == CRAWL1
from pagerank import transition_model, crawl if len(sys.argv) != 2: sys.exit("Usage: python pagerank.py corpus") corpus = crawl(sys.argv[1]) print(corpus) trans = transition_model(corpus, "1.html", 0.85) print(trans)