class GeneralTest(unittest.TestCase):
    def setUp(self):
        self.settings = Settings({
            'DOWNLOAD_HANDLERS': {
                'file':
                'crawlmi.core.handlers.FileDownloadHandler',
                'http':
                'crawlmi.core.handlers.HttpDownloadHandler',
                'https':
                'crawlmi.tests.test_downloader_handlers.NonConfiguredHandler',
            }
        })
        self.handler = GeneralHandler(self.settings)

    def test_init(self):
        self.assertIsInstance(self.handler._handlers['file'],
                              FileDownloadHandler)
        self.assertIsInstance(self.handler._handlers['http'],
                              HttpDownloadHandler)
        self.assertIn('https', self.handler._not_configured)

    def test_get_handler(self):
        h = self.handler._get_handler(Request('file:///etc/fstab'))
        self.assertIsInstance(h, FileDownloadHandler)
        h = self.handler._get_handler(Request('http://www.github.com/'))
        self.assertIsInstance(h, HttpDownloadHandler)
        self.assertRaises(NotSupported, self.handler._get_handler,
                          Request('https://www.githib.com/'))
 def setUp(self):
     self.settings = Settings({
         'DOWNLOAD_HANDLERS': {
             'file':
             'crawlmi.core.handlers.FileDownloadHandler',
             'http':
             'crawlmi.core.handlers.HttpDownloadHandler',
             'https':
             'crawlmi.tests.test_downloader_handlers.NonConfiguredHandler',
         }
     })
     self.handler = GeneralHandler(self.settings)
예제 #3
0
    def __init__(self, settings, request_queue, response_queue,
                 download_handler=None, clock=None):
        self.request_queue = request_queue
        self.response_queue = response_queue  # queue of responses
        self.download_handler = download_handler or GeneralHandler(settings)
        self.slots = {}
        self.num_in_progress = 0
        self.clock = clock or reactor
        self.processing = LoopingCall(self.process, clock=self.clock)
        self.processing.schedule(self.QUEUE_CHECK_FREQUENCY, now=True)
        self.running = True

        self.download_delay = settings.get_float('DOWNLOAD_DELAY')
        self.randomize_delay = settings.get_int(
            'RANDOMIZE_DOWNLOAD_DELAY')
        if self.download_delay:
            self.total_concurrency = self.domain_concurrency = 1
            self.use_domain_specific = False
        else:
            self.total_concurrency = settings.get_int(
                'CONCURRENT_REQUESTS')
            self.domain_concurrency = settings.get_int(
                'CONCURRENT_REQUESTS_PER_DOMAIN')
            if (not self.domain_concurrency or
                    self.domain_concurrency >= self.total_concurrency):
                self.use_domain_specific = False
                self.domain_concurrency = self.total_concurrency
            else:
                self.use_domain_specific = True
 def setUp(self):
     self.settings = Settings({
         'DOWNLOAD_HANDLERS': {
             'file': 'crawlmi.core.handlers.FileDownloadHandler',
             'http': 'crawlmi.core.handlers.HttpDownloadHandler',
             'https': 'crawlmi.tests.test_downloader_handlers.NonConfiguredHandler',
         }
     })
     self.handler = GeneralHandler(self.settings)
class GeneralTest(unittest.TestCase):

    def setUp(self):
        self.settings = Settings({
            'DOWNLOAD_HANDLERS': {
                'file': 'crawlmi.core.handlers.FileDownloadHandler',
                'http': 'crawlmi.core.handlers.HttpDownloadHandler',
                'https': 'crawlmi.tests.test_downloader_handlers.NonConfiguredHandler',
            }
        })
        self.handler = GeneralHandler(self.settings)

    def test_init(self):
        self.assertIsInstance(self.handler._handlers['file'], FileDownloadHandler)
        self.assertIsInstance(self.handler._handlers['http'], HttpDownloadHandler)
        self.assertIn('https', self.handler._not_configured)

    def test_get_handler(self):
        h = self.handler._get_handler(Request('file:///etc/fstab'))
        self.assertIsInstance(h, FileDownloadHandler)
        h = self.handler._get_handler(Request('http://www.github.com/'))
        self.assertIsInstance(h, HttpDownloadHandler)
        self.assertRaises(NotSupported, self.handler._get_handler,
                          Request('https://www.githib.com/'))