def test_invalid_crawl_timeout_configuration(self): """ Test for invalid crawl_timeout configuration parse. """ self.write_configuration_file( '[spider]\n' 'crawl_timeout: 0\n' ) with self.assertRaises(mini_spider.ConfigurationException): mini_spider.parse_configuration(self.configuration_file_path)
def test_invalid_thread_count_configuration(self): """ Test for invalid thread_count configuration parse. """ self.write_configuration_file( '[spider]\n' 'thread_count: 0\n' ) with self.assertRaises(mini_spider.ConfigurationException): mini_spider.parse_configuration(self.configuration_file_path)
def test_invalid_max_depth_configuration(self): """ Test for invalid max_depth configuration parse. """ self.write_configuration_file( '[spider]\n' 'max_depth: -1\n' ) with self.assertRaises(mini_spider.ConfigurationException): mini_spider.parse_configuration(self.configuration_file_path)
def test_fully_default_configuration(self): """ Test for fully default configuration file parse. """ configuration = mini_spider.parse_configuration(self.configuration_file_path) self.assertEqual(configuration.get('spider', 'url_list_file'), './urls') self.assertEqual(configuration.get('spider', 'output_directory'), './output') self.assertEqual(configuration.getint('spider', 'max_depth'), 1) self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1) self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 1) self.assertEqual(configuration.getint('spider', 'thread_count'), 8) self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(gif|png|jpg|bmp)$')
def test_fully_default_configuration(self): """ Test for fully default configuration file parse. """ configuration = mini_spider.parse_configuration( self.configuration_file_path) self.assertEqual(configuration.get('spider', 'url_list_file'), './urls') self.assertEqual(configuration.get('spider', 'output_directory'), './output') self.assertEqual(configuration.getint('spider', 'max_depth'), 1) self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1) self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 1) self.assertEqual(configuration.getint('spider', 'thread_count'), 8) self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(gif|png|jpg|bmp)$')
def test_partly_default_configuration(self): """ Test for partly default configuration file parse. """ self.write_configuration_file( '[spider]\n' 'max_depth: 10\n' 'crawl_interval: 2\n' 'crawl_timeout: 10\n' 'target_url: .*\.(com|cn|net)$\n' ) configuration = mini_spider.parse_configuration(self.configuration_file_path) self.assertEqual(configuration.get('spider', 'url_list_file'), './urls') self.assertEqual(configuration.get('spider', 'output_directory'), './output') self.assertEqual(configuration.getint('spider', 'max_depth'), 10) self.assertEqual(configuration.getint('spider', 'crawl_interval'), 2) self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 10) self.assertEqual(configuration.getint('spider', 'thread_count'), 8) self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(com|cn|net)$')
def test_partly_default_configuration(self): """ Test for partly default configuration file parse. """ self.write_configuration_file('[spider]\n' 'max_depth: 10\n' 'crawl_interval: 2\n' 'crawl_timeout: 10\n' 'target_url: .*\.(com|cn|net)$\n') configuration = mini_spider.parse_configuration( self.configuration_file_path) self.assertEqual(configuration.get('spider', 'url_list_file'), './urls') self.assertEqual(configuration.get('spider', 'output_directory'), './output') self.assertEqual(configuration.getint('spider', 'max_depth'), 10) self.assertEqual(configuration.getint('spider', 'crawl_interval'), 2) self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 10) self.assertEqual(configuration.getint('spider', 'thread_count'), 8) self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(com|cn|net)$')
def test_normal_configuration(self): """ Test for normal configuration file parse. """ self.write_configuration_file( '[spider]\n' 'url_list_file: ./urls\n' 'output_directory: ./output\n' 'max_depth: 6\n' 'crawl_interval: 1\n' 'crawl_timeout: 5\n' 'target_url: .*\.(gif|png|jpg|bmp)$\n' 'thread_count: 8\n' ) configuration = mini_spider.parse_configuration(self.configuration_file_path) self.assertEqual(configuration.get('spider', 'url_list_file'), './urls') self.assertEqual(configuration.get('spider', 'output_directory'), './output') self.assertEqual(configuration.getint('spider', 'max_depth'), 6) self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1) self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 5) self.assertEqual(configuration.getint('spider', 'thread_count'), 8) self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(gif|png|jpg|bmp)$')
def test_normal_configuration(self): """ Test for normal configuration file parse. """ self.write_configuration_file('[spider]\n' 'url_list_file: ./urls\n' 'output_directory: ./output\n' 'max_depth: 6\n' 'crawl_interval: 1\n' 'crawl_timeout: 5\n' 'target_url: .*\.(gif|png|jpg|bmp)$\n' 'thread_count: 8\n') configuration = mini_spider.parse_configuration( self.configuration_file_path) self.assertEqual(configuration.get('spider', 'url_list_file'), './urls') self.assertEqual(configuration.get('spider', 'output_directory'), './output') self.assertEqual(configuration.getint('spider', 'max_depth'), 6) self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1) self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 5) self.assertEqual(configuration.getint('spider', 'thread_count'), 8) self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(gif|png|jpg|bmp)$')
def test_invalid_thread_count_configuration(self): """ Test for invalid thread_count configuration parse. """ self.write_configuration_file('[spider]\n' 'thread_count: 0\n') with self.assertRaises(mini_spider.ConfigurationException): mini_spider.parse_configuration(self.configuration_file_path)
def test_invalid_crawl_timeout_configuration(self): """ Test for invalid crawl_timeout configuration parse. """ self.write_configuration_file('[spider]\n' 'crawl_timeout: 0\n') with self.assertRaises(mini_spider.ConfigurationException): mini_spider.parse_configuration(self.configuration_file_path)
def test_invalid_max_depth_configuration(self): """ Test for invalid max_depth configuration parse. """ self.write_configuration_file('[spider]\n' 'max_depth: -1\n') with self.assertRaises(mini_spider.ConfigurationException): mini_spider.parse_configuration(self.configuration_file_path)