Beispiel #1
0
    def testNotFound(self):
        """Tests when a URL in the crawl is not found."""
        @webserver
        def test(path):
            if path == '/':
                return 200, 'text/html', (
                    'Hello world! <a href="/missing">x</a>')
            elif path == '/missing':
                return 404, 'text/plain', 'Nope'

        site_diff.real_main(
            start_url='http://%s:%d/' % test.server_address,
            ignore_prefixes=['/ignore'],
            output_dir=self.output_dir,
            coordinator=self.coordinator)
        test.shutdown()

        self.assertTrue(exists(join(self.output_dir, '__run.log')))
        self.assertTrue(exists(join(self.output_dir, '__run.png')))
        self.assertTrue(exists(join(self.output_dir, '__config.js')))
        self.assertTrue(exists(join(self.output_dir, 'url_paths.txt')))

        self.assertEquals(
            ['/'],
            self.output_readlines('url_paths.txt'))

        self.fail()
Beispiel #2
0
    def testNoDifferences(self):
        """Tests crawling the site end-to-end."""
        @webserver
        def test(path):
            if path == '/':
                return 200, 'text/html', 'Hello world!'

        site_diff.real_main(
            start_url='http://%s:%d/' % test.server_address,
            output_dir=self.reference_dir,
            coordinator=self.coordinator)

        self.coordinator = workers.get_coordinator()
        site_diff.real_main(
            start_url='http://%s:%d/' % test.server_address,
            output_dir=self.output_dir,
            reference_dir=self.reference_dir,
            coordinator=self.coordinator)
        test.shutdown()

        self.assertTrue(exists(join(self.reference_dir, '__run.log')))
        self.assertTrue(exists(join(self.reference_dir, '__run.png')))
        self.assertTrue(exists(join(self.reference_dir, '__config.js')))
        self.assertTrue(exists(join(self.reference_dir, 'url_paths.txt')))

        self.assertTrue(exists(join(self.output_dir, '__run.log')))
        self.assertTrue(exists(join(self.output_dir, '__run.png')))
        self.assertTrue(exists(join(self.output_dir, '__ref.log')))
        self.assertTrue(exists(join(self.output_dir, '__ref.png')))
        self.assertFalse(exists(join(self.output_dir, '__diff.png'))) # No diff
        self.assertTrue(exists(join(self.output_dir, '__diff.log')))
        self.assertTrue(exists(join(self.output_dir, '__config.js')))
        self.assertTrue(exists(join(self.output_dir, 'url_paths.txt')))
Beispiel #3
0
    def testFirstSnapshot(self):
        """Tests taking the very first snapshot."""
        @webserver
        def test(path):
            if path == '/':
                return 200, 'text/html', 'Hello world!'

        site_diff.real_main(
            start_url='http://%s:%d/' % test.server_address,
            output_dir=self.output_dir,
            coordinator=self.coordinator)
        test.shutdown()

        self.assertTrue(exists(join(self.output_dir, '__run.log')))
        self.assertTrue(exists(join(self.output_dir, '__run.png')))
        self.assertTrue(exists(join(self.output_dir, '__config.js')))
        self.assertTrue(exists(join(self.output_dir, 'url_paths.txt')))

        self.assertEquals(
            ['/'],
            self.output_readlines('url_paths.txt'))
Beispiel #4
0
    def testOneDifference(self):
        """Tests when there is one found difference."""
        @webserver
        def test(path):
            if path == '/':
                return 200, 'text/html', 'Hello world!'

        site_diff.real_main(
            start_url='http://%s:%d/' % test.server_address,
            output_dir=self.reference_dir,
            coordinator=self.coordinator)
        test.shutdown()

        @webserver
        def test(path):
            if path == '/':
                return 200, 'text/html', 'Hello world a little different!'

        self.coordinator = workers.get_coordinator()
        site_diff.real_main(
            start_url='http://%s:%d/' % test.server_address,
            output_dir=self.output_dir,
            reference_dir=self.reference_dir,
            coordinator=self.coordinator)
        test.shutdown()

        self.assertTrue(exists(join(self.reference_dir, '__run.log')))
        self.assertTrue(exists(join(self.reference_dir, '__run.png')))
        self.assertTrue(exists(join(self.reference_dir, '__config.js')))
        self.assertTrue(exists(join(self.reference_dir, 'url_paths.txt')))

        self.assertTrue(exists(join(self.output_dir, '__run.log')))
        self.assertTrue(exists(join(self.output_dir, '__run.png')))
        self.assertTrue(exists(join(self.output_dir, '__ref.log')))
        self.assertTrue(exists(join(self.output_dir, '__ref.png')))
        self.assertTrue(exists(join(self.output_dir, '__diff.png'))) # Diff!!
        self.assertTrue(exists(join(self.output_dir, '__diff.log')))
        self.assertTrue(exists(join(self.output_dir, '__config.js')))
        self.assertTrue(exists(join(self.output_dir, 'url_paths.txt')))
Beispiel #5
0
    def testCrawler(self):
        """Tests that the crawler behaves well.

        Specifically:
            - Finds new links in HTML data
            - Avoids non-HTML pages
            - Respects ignore patterns specified on flags
        """
        @webserver
        def test(path):
            if path == '/':
                return 200, 'text/html', (
                    'Hello world! <a href="/stuff">x</a> '
                    '<a href="/ignore">y</a>')
            elif path == '/stuff':
                return 200, 'text/html', 'Stuff page <a href="/avoid">x</a>'
            elif path == '/avoid':
                return 200, 'text/plain', 'Ignore me!'

        site_diff.real_main(
            start_url='http://%s:%d/' % test.server_address,
            ignore_prefixes=['/ignore'],
            output_dir=self.output_dir,
            coordinator=self.coordinator)
        test.shutdown()

        self.assertTrue(exists(join(self.output_dir, '__run.log')))
        self.assertTrue(exists(join(self.output_dir, '__run.png')))
        self.assertTrue(exists(join(self.output_dir, '__config.js')))
        self.assertTrue(exists(join(self.output_dir, 'url_paths.txt')))
        self.assertFalse(exists(join(self.output_dir, '_ignore_run.log')))
        self.assertFalse(exists(join(self.output_dir, '_ignore_run.png')))
        self.assertFalse(exists(join(self.output_dir, '_ignore_config.js')))

        self.assertEquals(
            ['/', '/stuff'],
            self.output_readlines('url_paths.txt'))