def test_invalid_crawl_timeout_configuration(self):
     """ Test for invalid crawl_timeout configuration parse.
     """
     self.write_configuration_file(
         '[spider]\n'
         'crawl_timeout: 0\n'
     ) 
     with self.assertRaises(mini_spider.ConfigurationException):
         mini_spider.parse_configuration(self.configuration_file_path)
 def test_invalid_thread_count_configuration(self):
     """ Test for invalid thread_count configuration parse.
     """
     self.write_configuration_file(
         '[spider]\n'
         'thread_count: 0\n'
     ) 
     with self.assertRaises(mini_spider.ConfigurationException):
         mini_spider.parse_configuration(self.configuration_file_path)
 def test_invalid_max_depth_configuration(self):
     """ Test for invalid max_depth configuration parse.
     """
     self.write_configuration_file(
         '[spider]\n'
         'max_depth: -1\n'
     ) 
     with self.assertRaises(mini_spider.ConfigurationException):
         mini_spider.parse_configuration(self.configuration_file_path)
 def test_fully_default_configuration(self):
     """ Test for fully default configuration file parse.
     """
     configuration = mini_spider.parse_configuration(self.configuration_file_path)
     self.assertEqual(configuration.get('spider', 'url_list_file'), './urls')
     self.assertEqual(configuration.get('spider', 'output_directory'), './output')
     self.assertEqual(configuration.getint('spider', 'max_depth'), 1)
     self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1)
     self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 1)
     self.assertEqual(configuration.getint('spider', 'thread_count'), 8)
     self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(gif|png|jpg|bmp)$')
 def test_fully_default_configuration(self):
     """ Test for fully default configuration file parse.
     """
     configuration = mini_spider.parse_configuration(
         self.configuration_file_path)
     self.assertEqual(configuration.get('spider', 'url_list_file'),
                      './urls')
     self.assertEqual(configuration.get('spider', 'output_directory'),
                      './output')
     self.assertEqual(configuration.getint('spider', 'max_depth'), 1)
     self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1)
     self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 1)
     self.assertEqual(configuration.getint('spider', 'thread_count'), 8)
     self.assertEqual(configuration.get('spider', 'target_url'),
                      '.*\.(gif|png|jpg|bmp)$')
 def test_partly_default_configuration(self):
     """ Test for partly default configuration file parse.
     """
     self.write_configuration_file(
         '[spider]\n'
         'max_depth: 10\n'
         'crawl_interval: 2\n'
         'crawl_timeout: 10\n'
         'target_url: .*\.(com|cn|net)$\n'
     )
     configuration = mini_spider.parse_configuration(self.configuration_file_path)
     self.assertEqual(configuration.get('spider', 'url_list_file'), './urls')
     self.assertEqual(configuration.get('spider', 'output_directory'), './output')
     self.assertEqual(configuration.getint('spider', 'max_depth'), 10)
     self.assertEqual(configuration.getint('spider', 'crawl_interval'), 2)
     self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 10)
     self.assertEqual(configuration.getint('spider', 'thread_count'), 8)
     self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(com|cn|net)$')
 def test_partly_default_configuration(self):
     """ Test for partly default configuration file parse.
     """
     self.write_configuration_file('[spider]\n'
                                   'max_depth: 10\n'
                                   'crawl_interval: 2\n'
                                   'crawl_timeout: 10\n'
                                   'target_url: .*\.(com|cn|net)$\n')
     configuration = mini_spider.parse_configuration(
         self.configuration_file_path)
     self.assertEqual(configuration.get('spider', 'url_list_file'),
                      './urls')
     self.assertEqual(configuration.get('spider', 'output_directory'),
                      './output')
     self.assertEqual(configuration.getint('spider', 'max_depth'), 10)
     self.assertEqual(configuration.getint('spider', 'crawl_interval'), 2)
     self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 10)
     self.assertEqual(configuration.getint('spider', 'thread_count'), 8)
     self.assertEqual(configuration.get('spider', 'target_url'),
                      '.*\.(com|cn|net)$')
    def test_normal_configuration(self):
        """ Test for normal configuration file parse.
        """
        self.write_configuration_file(
            '[spider]\n'
            'url_list_file: ./urls\n'
            'output_directory: ./output\n'
            'max_depth: 6\n'
            'crawl_interval: 1\n'
            'crawl_timeout: 5\n'
            'target_url: .*\.(gif|png|jpg|bmp)$\n'
            'thread_count: 8\n'
        )

        configuration = mini_spider.parse_configuration(self.configuration_file_path)
        self.assertEqual(configuration.get('spider', 'url_list_file'), './urls')
        self.assertEqual(configuration.get('spider', 'output_directory'), './output')
        self.assertEqual(configuration.getint('spider', 'max_depth'), 6)
        self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1)
        self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 5)
        self.assertEqual(configuration.getint('spider', 'thread_count'), 8)
        self.assertEqual(configuration.get('spider', 'target_url'), '.*\.(gif|png|jpg|bmp)$')
    def test_normal_configuration(self):
        """ Test for normal configuration file parse.
        """
        self.write_configuration_file('[spider]\n'
                                      'url_list_file: ./urls\n'
                                      'output_directory: ./output\n'
                                      'max_depth: 6\n'
                                      'crawl_interval: 1\n'
                                      'crawl_timeout: 5\n'
                                      'target_url: .*\.(gif|png|jpg|bmp)$\n'
                                      'thread_count: 8\n')

        configuration = mini_spider.parse_configuration(
            self.configuration_file_path)
        self.assertEqual(configuration.get('spider', 'url_list_file'),
                         './urls')
        self.assertEqual(configuration.get('spider', 'output_directory'),
                         './output')
        self.assertEqual(configuration.getint('spider', 'max_depth'), 6)
        self.assertEqual(configuration.getint('spider', 'crawl_interval'), 1)
        self.assertEqual(configuration.getint('spider', 'crawl_timeout'), 5)
        self.assertEqual(configuration.getint('spider', 'thread_count'), 8)
        self.assertEqual(configuration.get('spider', 'target_url'),
                         '.*\.(gif|png|jpg|bmp)$')
 def test_invalid_thread_count_configuration(self):
     """ Test for invalid thread_count configuration parse.
     """
     self.write_configuration_file('[spider]\n' 'thread_count: 0\n')
     with self.assertRaises(mini_spider.ConfigurationException):
         mini_spider.parse_configuration(self.configuration_file_path)
 def test_invalid_crawl_timeout_configuration(self):
     """ Test for invalid crawl_timeout configuration parse.
     """
     self.write_configuration_file('[spider]\n' 'crawl_timeout: 0\n')
     with self.assertRaises(mini_spider.ConfigurationException):
         mini_spider.parse_configuration(self.configuration_file_path)
 def test_invalid_max_depth_configuration(self):
     """ Test for invalid max_depth configuration parse.
     """
     self.write_configuration_file('[spider]\n' 'max_depth: -1\n')
     with self.assertRaises(mini_spider.ConfigurationException):
         mini_spider.parse_configuration(self.configuration_file_path)