Ejemplo n.º 1
0
    def test_outgoing_links_to_pagerank_format(self):
        """
        Test the mechanism for converting a dictionary of urls and their
        outgoing links to a dictionary of urls, their incoming links, and
        the number of links on each incoming links' page.
        """
        dictionary_of_outgoing_links = {'site1':[         'site2', 'site3'],
                                        'site2':[                  'site3'],
                                        'site3':['site1', 'site2'         ]}

        expected_output = {'site1':{'incoming links':[                  'site3'], 'number of outgoing links': 2, 'pagerank': 1},
                           'site2':{'incoming links':['site3', 'site1'         ], 'number of outgoing links': 1, 'pagerank': 1},
                           'site3':{'incoming links':['site2', 'site1'         ], 'number of outgoing links': 2, 'pagerank': 1}}

        self.assertEqual(spider.outgoing_links_to_pagerank(dictionary_of_outgoing_links), expected_output), "Conversion from outgoing link format to incoming link format failed."
Ejemplo n.º 2
0
    def test_pagerank_with_an_unscanned_site(self):
        """
        Test the results coming out of the pagerank algorithm.
        To deal with unscanned but referenced webpages the
        program adds an entry and assumes 1 incoming link.

        The two inputs below should have identical output.
        """
        input3 = {'site1':[         'site2', 'site3', 'site4'],
                  'site2':[                  'site3', 'site4'],
                  'site3':['site1', 'site2'                  ]}

        input4 = {'site1':[         'site2', 'site3', 'site4'],
                  'site2':[                  'site3', 'site4'],
                  'site3':['site1', 'site2'                  ],
                  'site4':[                                  ],
                  'site5':[                                  ]}

        a_random_number = randint(0,10)
        self.assertEqual(spider.page_rank(spider.outgoing_links_to_pagerank(input3), a_random_number), spider.page_rank(spider.outgoing_links_to_pagerank(input4), a_random_number)), "Unscanned site pagerank is incorrect"