def test_outgoing_links_to_pagerank_format(self): """ Test the mechanism for converting a dictionary of urls and their outgoing links to a dictionary of urls, their incoming links, and the number of links on each incoming links' page. """ dictionary_of_outgoing_links = {'site1':[ 'site2', 'site3'], 'site2':[ 'site3'], 'site3':['site1', 'site2' ]} expected_output = {'site1':{'incoming links':[ 'site3'], 'number of outgoing links': 2, 'pagerank': 1}, 'site2':{'incoming links':['site3', 'site1' ], 'number of outgoing links': 1, 'pagerank': 1}, 'site3':{'incoming links':['site2', 'site1' ], 'number of outgoing links': 2, 'pagerank': 1}} self.assertEqual(spider.outgoing_links_to_pagerank(dictionary_of_outgoing_links), expected_output), "Conversion from outgoing link format to incoming link format failed."
def test_pagerank_with_an_unscanned_site(self): """ Test the results coming out of the pagerank algorithm. To deal with unscanned but referenced webpages the program adds an entry and assumes 1 incoming link. The two inputs below should have identical output. """ input3 = {'site1':[ 'site2', 'site3', 'site4'], 'site2':[ 'site3', 'site4'], 'site3':['site1', 'site2' ]} input4 = {'site1':[ 'site2', 'site3', 'site4'], 'site2':[ 'site3', 'site4'], 'site3':['site1', 'site2' ], 'site4':[ ], 'site5':[ ]} a_random_number = randint(0,10) self.assertEqual(spider.page_rank(spider.outgoing_links_to_pagerank(input3), a_random_number), spider.page_rank(spider.outgoing_links_to_pagerank(input4), a_random_number)), "Unscanned site pagerank is incorrect"