Exemple #1
0
    def test_sample_pagerank(self):

        # Test Corpus 0
        corpus = pr.crawl("corpus0")
        ranks = pr.iterate_pagerank(corpus, pr.DAMPING)
        rankSum = 0
        for rank in ranks.values():
            rankSum += rank
        self.assertAlmostEqual(rankSum, 1, 1)

        # Test Corpus 1
        corpus = pr.crawl("corpus1")
        ranks = pr.iterate_pagerank(corpus, pr.DAMPING)
        rankSum = 0
        for rank in ranks.values():
            rankSum += rank
        self.assertAlmostEqual(rankSum, 1, 1)

        # Test Corpus 2
        corpus = pr.crawl("corpus2")
        ranks = pr.iterate_pagerank(corpus, pr.DAMPING)
        rankSum = 0
        for rank in ranks.values():
            rankSum += rank
        self.assertAlmostEqual(rankSum, 1, 1)
Exemple #2
0
def test_iterate_pagerank():
    assert pagerank.iterate_pagerank(pagerank.crawl("corpus0"),
                                     DAMPING) == ITERATION_PAGE_RANKS_0
    assert pagerank.iterate_pagerank(pagerank.crawl("corpus1"),
                                     DAMPING) == ITERATION_PAGE_RANKS_1
    assert pagerank.iterate_pagerank(pagerank.crawl("corpus2"),
                                     DAMPING) == ITERATION_PAGE_RANKS_2
Exemple #3
0
    def test_iterate_pagerank(self):
        corpus = {
            "1.html": {"2.html", "3.html"},
            "2.html": {"3.html"},
            "3.html": {"2.html"}
        }
        damping_factor = 0.85

        result = iterate_pagerank(corpus, damping_factor)
        self.assertEqual(1, round(sum(result.values()), 4))
Exemple #4
0
    def test_iterate_pagerank_corpus0(self):
        corpus = {
            "1.html": {"2.html"},
            "2.html": {"1.html", "3.html"},
            "3.html": {"2.html", "4.html"},
            "4.html": {"2.html"}
        }
        damping_factor = 0.85

        result = iterate_pagerank(corpus, damping_factor)
        self.assertEqual(1, round(sum(result.values()), 4))
        self.assertEqual(0.2202, round(result["1.html"], 4))
        self.assertEqual(0.4289, round(result["2.html"], 4))
        self.assertEqual(0.2202, round(result["3.html"], 4))
        self.assertEqual(0.1307, round(result["4.html"], 4))
Exemple #5
0
import os
import random
import re
import sys
import pagerank

if len(sys.argv) != 2:
    sys.exit("Usage: python pagerank.py corpus")
corpus = pagerank.crawl(sys.argv[1])

# Test sampling
# print(pagerank.sample_pagerank(corpus, 0.8, 100))

# Test iteration
print(pagerank.iterate_pagerank(corpus, 0.8))
Exemple #6
0
import pagerank as pr
my_dicy = {
        "1.html": {"2.html", "3.html"},
        "2.html": {"3.html"}, 
        "3.html": {"2.html"}}

pr.iterate_pagerank(my_dicy, 0.85)
Exemple #7
0
from pagerank import transition_model, sample_pagerank, iterate_pagerank

corpus = {
    "1.html": {"2.html", "3.html"},
    "2.html": {"3.html"},
    "3.html": {"2.html"}
}
page = "1.html"
damp = 0.85

transition_model(corpus, page, damp)
print(sample_pagerank(corpus, 0.85, 5000))
print(iterate_pagerank(corpus, 0.85))
Exemple #8
0
 def testIteratePagerank(self):
     pagerank_3 = pr.iterate_pagerank(self.corpus_3, DAMPING)
     self.assertEqual(pagerank_3, {'1.html': 0.232539658203125, '2.html': 0.43412700846354163, '3.html': 0.3333333333333333})