def testNotFound(self): """Tests when a URL in the crawl is not found.""" @webserver def test(path): if path == '/': return 200, 'text/html', ( 'Hello world! <a href="/missing">x</a>') elif path == '/missing': return 404, 'text/plain', 'Nope' site_diff.real_main( start_url='http://%s:%d/' % test.server_address, ignore_prefixes=['/ignore'], output_dir=self.output_dir, coordinator=self.coordinator) test.shutdown() self.assertTrue(exists(join(self.output_dir, '__run.log'))) self.assertTrue(exists(join(self.output_dir, '__run.png'))) self.assertTrue(exists(join(self.output_dir, '__config.js'))) self.assertTrue(exists(join(self.output_dir, 'url_paths.txt'))) self.assertEquals( ['/'], self.output_readlines('url_paths.txt')) self.fail()
def testNoDifferences(self): """Tests crawling the site end-to-end.""" @webserver def test(path): if path == '/': return 200, 'text/html', 'Hello world!' site_diff.real_main( start_url='http://%s:%d/' % test.server_address, output_dir=self.reference_dir, coordinator=self.coordinator) self.coordinator = workers.get_coordinator() site_diff.real_main( start_url='http://%s:%d/' % test.server_address, output_dir=self.output_dir, reference_dir=self.reference_dir, coordinator=self.coordinator) test.shutdown() self.assertTrue(exists(join(self.reference_dir, '__run.log'))) self.assertTrue(exists(join(self.reference_dir, '__run.png'))) self.assertTrue(exists(join(self.reference_dir, '__config.js'))) self.assertTrue(exists(join(self.reference_dir, 'url_paths.txt'))) self.assertTrue(exists(join(self.output_dir, '__run.log'))) self.assertTrue(exists(join(self.output_dir, '__run.png'))) self.assertTrue(exists(join(self.output_dir, '__ref.log'))) self.assertTrue(exists(join(self.output_dir, '__ref.png'))) self.assertFalse(exists(join(self.output_dir, '__diff.png'))) # No diff self.assertTrue(exists(join(self.output_dir, '__diff.log'))) self.assertTrue(exists(join(self.output_dir, '__config.js'))) self.assertTrue(exists(join(self.output_dir, 'url_paths.txt')))
def testFirstSnapshot(self): """Tests taking the very first snapshot.""" @webserver def test(path): if path == '/': return 200, 'text/html', 'Hello world!' site_diff.real_main( start_url='http://%s:%d/' % test.server_address, output_dir=self.output_dir, coordinator=self.coordinator) test.shutdown() self.assertTrue(exists(join(self.output_dir, '__run.log'))) self.assertTrue(exists(join(self.output_dir, '__run.png'))) self.assertTrue(exists(join(self.output_dir, '__config.js'))) self.assertTrue(exists(join(self.output_dir, 'url_paths.txt'))) self.assertEquals( ['/'], self.output_readlines('url_paths.txt'))
def testOneDifference(self): """Tests when there is one found difference.""" @webserver def test(path): if path == '/': return 200, 'text/html', 'Hello world!' site_diff.real_main( start_url='http://%s:%d/' % test.server_address, output_dir=self.reference_dir, coordinator=self.coordinator) test.shutdown() @webserver def test(path): if path == '/': return 200, 'text/html', 'Hello world a little different!' self.coordinator = workers.get_coordinator() site_diff.real_main( start_url='http://%s:%d/' % test.server_address, output_dir=self.output_dir, reference_dir=self.reference_dir, coordinator=self.coordinator) test.shutdown() self.assertTrue(exists(join(self.reference_dir, '__run.log'))) self.assertTrue(exists(join(self.reference_dir, '__run.png'))) self.assertTrue(exists(join(self.reference_dir, '__config.js'))) self.assertTrue(exists(join(self.reference_dir, 'url_paths.txt'))) self.assertTrue(exists(join(self.output_dir, '__run.log'))) self.assertTrue(exists(join(self.output_dir, '__run.png'))) self.assertTrue(exists(join(self.output_dir, '__ref.log'))) self.assertTrue(exists(join(self.output_dir, '__ref.png'))) self.assertTrue(exists(join(self.output_dir, '__diff.png'))) # Diff!! self.assertTrue(exists(join(self.output_dir, '__diff.log'))) self.assertTrue(exists(join(self.output_dir, '__config.js'))) self.assertTrue(exists(join(self.output_dir, 'url_paths.txt')))
def testCrawler(self): """Tests that the crawler behaves well. Specifically: - Finds new links in HTML data - Avoids non-HTML pages - Respects ignore patterns specified on flags """ @webserver def test(path): if path == '/': return 200, 'text/html', ( 'Hello world! <a href="/stuff">x</a> ' '<a href="/ignore">y</a>') elif path == '/stuff': return 200, 'text/html', 'Stuff page <a href="/avoid">x</a>' elif path == '/avoid': return 200, 'text/plain', 'Ignore me!' site_diff.real_main( start_url='http://%s:%d/' % test.server_address, ignore_prefixes=['/ignore'], output_dir=self.output_dir, coordinator=self.coordinator) test.shutdown() self.assertTrue(exists(join(self.output_dir, '__run.log'))) self.assertTrue(exists(join(self.output_dir, '__run.png'))) self.assertTrue(exists(join(self.output_dir, '__config.js'))) self.assertTrue(exists(join(self.output_dir, 'url_paths.txt'))) self.assertFalse(exists(join(self.output_dir, '_ignore_run.log'))) self.assertFalse(exists(join(self.output_dir, '_ignore_run.png'))) self.assertFalse(exists(join(self.output_dir, '_ignore_config.js'))) self.assertEquals( ['/', '/stuff'], self.output_readlines('url_paths.txt'))