Ejemplo n.º 1
0
def test_iterate_pagerank():
    assert pagerank.iterate_pagerank(pagerank.crawl("corpus0"),
                                     DAMPING) == ITERATION_PAGE_RANKS_0
    assert pagerank.iterate_pagerank(pagerank.crawl("corpus1"),
                                     DAMPING) == ITERATION_PAGE_RANKS_1
    assert pagerank.iterate_pagerank(pagerank.crawl("corpus2"),
                                     DAMPING) == ITERATION_PAGE_RANKS_2
Ejemplo n.º 2
0
    def test_sample_pagerank(self):

        # Test Corpus 0
        corpus = pr.crawl("corpus0")
        ranks = pr.iterate_pagerank(corpus, pr.DAMPING)
        rankSum = 0
        for rank in ranks.values():
            rankSum += rank
        self.assertAlmostEqual(rankSum, 1, 1)

        # Test Corpus 1
        corpus = pr.crawl("corpus1")
        ranks = pr.iterate_pagerank(corpus, pr.DAMPING)
        rankSum = 0
        for rank in ranks.values():
            rankSum += rank
        self.assertAlmostEqual(rankSum, 1, 1)

        # Test Corpus 2
        corpus = pr.crawl("corpus2")
        ranks = pr.iterate_pagerank(corpus, pr.DAMPING)
        rankSum = 0
        for rank in ranks.values():
            rankSum += rank
        self.assertAlmostEqual(rankSum, 1, 1)
Ejemplo n.º 3
0
def test_sample_pagerank():
    random.seed(1)
    assert pagerank.sample_pagerank(pagerank.crawl("corpus0"), DAMPING,
                                    SAMPLES) == PAGE_RANKS_0
    assert pagerank.sample_pagerank(pagerank.crawl("corpus1"), DAMPING,
                                    SAMPLES) == PAGE_RANKS_1

    result2 = pagerank.sample_pagerank(pagerank.crawl("corpus2"), DAMPING,
                                       SAMPLES)
    assert result2 == PAGE_RANKS_2
    assert sum(list(
        result2.values())) == 1  # Confirm result is properely normalized

    # Confirm that process is pseudoRandom
    assert pagerank.sample_pagerank(pagerank.crawl("corpus2"), DAMPING,
                                    SAMPLES) != result2
Ejemplo n.º 4
0
import os
import random
import re
import sys
import pagerank

if len(sys.argv) != 2:
    sys.exit("Usage: python pagerank.py corpus")
corpus = pagerank.crawl(sys.argv[1])

# Test sampling
# print(pagerank.sample_pagerank(corpus, 0.8, 100))

# Test iteration
print(pagerank.iterate_pagerank(corpus, 0.8))
Ejemplo n.º 5
0
def test_crawl():
    assert pagerank.crawl() == CRAWL0
    assert pagerank.crawl("corpus0") == CRAWL0
    assert pagerank.crawl("corpus1") == CRAWL1
Ejemplo n.º 6
0
from pagerank import transition_model, crawl

if len(sys.argv) != 2:
    sys.exit("Usage: python pagerank.py corpus")
corpus = crawl(sys.argv[1])
print(corpus)
trans = transition_model(corpus, "1.html", 0.85)
print(trans)