def test_calls__process_completed_download_err_list(self, _process_completed_download):
        """
        In this test, we assert that download() calls _process_completed_download() correctly when pycurl
        reports a completed download through the error list.
        """
        # If we set max_concurrent to 1, it's easy to deterministically find the mocked curl for assertions
        # later
        config = DownloaderConfig(max_concurrent=1)
        curl_downloader = HTTPCurlDownloader(config)
        request_list = self._download_requests()[:1]

        curl_downloader.download(request_list)

        mock_easy_handle = pycurl.Curl.mock_objs[0]
        mock_multi_handle = pycurl.CurlMulti.mock_objs[0]
        self.assertEqual(_process_completed_download.call_count, 1)
        args = _process_completed_download.mock_calls[0][1]
        # There should be four args, since there were errors
        self.assertEqual(len(args), 4)
        # Now let's assert that the arguments were correct
        self.assertEqual(args[0], mock_easy_handle)
        self.assertEqual(args[1], mock_multi_handle)
        # There should be no free handles, since there was only one and it's being reported on
        self.assertEqual(args[2], [])
        # Assert that the error condition was passed
        self.assertEqual(args[3], {'code': 999, 'message': 'ERROR!'})
 def test_file_scheme(self):
     """
     In this test, we're making sure that file:// URLs work and is reported as succeeded
     when the path is valid.
     """
     # Test
     config = DownloaderConfig(max_concurrent=1)
     downloader = HTTPCurlDownloader(config)
     request_list = self._file_download_requests()[:1]
     listener = AggregatingEventListener()
     downloader.event_listener = listener
     downloader.download(request_list)
     # Verify
     self.assertEqual(len(listener.succeeded_reports), 1)
     self.assertEqual(len(listener.failed_reports), 0)
     self.assertTrue(os.path.exists(request_list[0].destination))
     # verify the downloaded file matches
     path_in = urlparse.urlparse(request_list[0].url).path
     fp = open(path_in)
     original_content = fp.read()
     fp.close()
     fp = open(request_list[0].destination)
     destination_content = fp.read()
     fp.close()
     self.assertEqual(original_content, destination_content)
    def test_download_successful(self, fire_download_succeeded, fire_download_failed,
                                 _clear_easy_handle_download):
        """
        Assert correct behavior for a successful download.
        """
        config = DownloaderConfig(max_concurrent=1)
        curl_downloader = HTTPCurlDownloader(config)
        multi_handle = curl_downloader._build_multi_handle()
        easy_handle = multi_handle.handles[0]
        easy_handle.report = DownloadReport('http://fake.com/doesntmatter.html', '/dev/null')
        multi_handle._curls = [easy_handle]
        free_handles = []
        start_time = datetime.now()

        curl_downloader._process_completed_download(easy_handle, multi_handle, free_handles)

        # The easy_handle should have been removed from the multi_handle
        multi_handle.remove_handle.assert_called_once_with(easy_handle)
        # _clear_easy_handle_download should have been handed the easy_handle
        _clear_easy_handle_download.assert_called_once_wth(easy_handle)
        # The free_handles list should have the easy_handle
        self.assertEqual(free_handles, [easy_handle])
        # fire_download_failed should not have been called
        self.assertEqual(fire_download_failed.call_count, 0)

        # fire_download_succeeded should have been called once with the report. Let's assert that, and assert
        # that the report looks good
        self.assertEqual(fire_download_succeeded.call_count, 1)
        report = fire_download_succeeded.mock_calls[0][1][0]
        self.assertTrue(isinstance(report, DownloadReport))
        self.assertTrue(report.state, DOWNLOAD_SUCCEEDED)
        # It's difficult to know what the finish_time on the report will be exactly, so we'll just assert that
        # it's after the start_time we recorded earlier
        self.assertTrue(report.finish_time > start_time)
    def test__build_easy_handle_calls__add_proxy_configuration(self,
                                                               _add_proxy_configuration):
        """
        This test simply asserts that _build_easy_handle() passes the easy_handle to
        _add_proxy_configuration().
        """
        config = DownloaderConfig()
        curl_downloader = HTTPCurlDownloader(config)

        easy_handle = curl_downloader._build_easy_handle()

        _add_proxy_configuration.assert_called_with(easy_handle)
    def test_max_speed_unset(self):
        """
        Assert that the max speed does not get passed to pycurl when it is not set.
        """
        # Let's leave max_speed out of this config
        config = DownloaderConfig()
        curl_downloader = HTTPCurlDownloader(config)

        easy_handle = curl_downloader._build_easy_handle()

        # Now let's assert that MAX_RECV_SPEED_LARGE wasn't passed to setopt
        setopt_setting_args = [call[0][0] for call in easy_handle.setopt.call_args_list]
        self.assertTrue(pycurl.MAX_RECV_SPEED_LARGE not in setopt_setting_args)
Example #6
0
def main():
    test_names = _get_test_names()

    for name in test_names:
        url_list = TESTS[name]()

        print '%s: download %d files from %s' % (name.upper(), len(url_list), url_list[0].rsplit('/', 1)[0])

        config = DownloaderConfig()
        download_dir = tempfile.mkdtemp(prefix=name+'-')
        request_list = [DownloadRequest(url, os.path.join(download_dir, _filename_from_url(url))) for url in url_list]

        downloader = HTTPCurlDownloader(config, TestDownloadEventListener())
        downloader.download(request_list)
    def test_max_speed_set(self):
        """
        Assert that the max speed gets passed to pycurl when it is set.
        """
        # Let's try specifying the max speed as a string of a valid integer, to verify that we correctly
        # cast it to an integer.
        max_speed = '57'
        config = DownloaderConfig(max_speed=max_speed)
        curl_downloader = HTTPCurlDownloader(config)
        easy_handle = mock.MagicMock()

        curl_downloader._add_connection_configuration(easy_handle)

        # Make sure that the max_speed setting was passed to our easy_handle
        easy_handle.setopt.assert_any_call(pycurl.MAX_RECV_SPEED_LARGE, int(max_speed))
    def test_is_canceled_false(self):
        """
        In this test, we leave the is_canceled boolean unset on the downloader, and we verify that the main
        loop executes once. Because our pycurl mocks "download" the entire file in one go, it will only
        execute one time, which means we can simply count that the select() call was made exactly once.
        """
        config = DownloaderConfig()
        curl_downloader = HTTPCurlDownloader(config)
        request_list = self._download_requests()[:1]

        curl_downloader.download(request_list)

        mock_multi_curl = pycurl.CurlMulti.mock_objs[0]
        # The call_count on the select() should be 1 since our pycurl Mock "downloads" the file in one go
        self.assertEqual(mock_multi_curl.select.call_count, 1)
    def test_no_proxy_settings(self):
        """
        Test the HTTPCurlDownloader._add_proxy_configuration method for the case
        when there are no proxy settings. It should not make any calls that are proxy
        related. In fact, due to the nature of the _add_proxy_configuration method, it
        should just not make any calls to setopt() at all, which is what we assert here.
        """
        config = DownloaderConfig()
        curl_downloader = HTTPCurlDownloader(config)
        easy_handle = mock.MagicMock()

        curl_downloader._add_proxy_configuration(easy_handle)

        # We can be sure that no proxy settings were set on the easy_handle if no calls
        # to its setopt method were called.
        self.assertEqual(easy_handle.setopt.call_count, 0)
    def test_is_canceled_true(self):
        """
        In this test, we set the is_canceled boolean on the downloader, and we verify that the main loop
        does not execute.
        """
        config = DownloaderConfig()
        curl_downloader = HTTPCurlDownloader(config)
        # Let's go ahead and set the cancellation flag, so the loop should not execute
        curl_downloader.cancel()
        request_list = self._download_requests()[:1]

        curl_downloader.download(request_list)

        mock_multi_curl = pycurl.CurlMulti.mock_objs[0]
        # Because we cancelled the download, the call_count on the select() should be 0
        self.assertEqual(mock_multi_curl.select.call_count, 0)
    def test_pycurl_errors(self, fire_download_succeeded, fire_download_failed, _clear_easy_handle_download):
        """
        Assert correct behavior when error is not None.
        """
        config = DownloaderConfig(max_concurrent=1)
        curl_downloader = HTTPCurlDownloader(config)
        multi_handle = curl_downloader._build_multi_handle()
        easy_handle = multi_handle.handles[0]
        easy_handle.report = DownloadReport('http://fake.com/doesntmatter.html', '/dev/null')
        multi_handle._curls = [easy_handle]

        def return_code(ignored_param):
            """
            This function will allow us to fake the response code when pycurl didn't reach the server, which
            will be 0.
            """
            return 0

        easy_handle.getinfo = return_code
        free_handles = []
        start_time = datetime.now()

        curl_downloader._process_completed_download(
            easy_handle, multi_handle, free_handles,
            {'code': pycurl.E_COULDNT_CONNECT, 'message': "Couldn't Connect"})

        # The easy_handle should have been removed from the multi_handle
        multi_handle.remove_handle.assert_called_once_with(easy_handle)
        # _clear_easy_handle_download should have been handed the easy_handle
        _clear_easy_handle_download.assert_called_once_wth(easy_handle)
        # The free_handles list should have the easy_handle
        self.assertEqual(free_handles, [easy_handle])
        # fire_download_succeeded should not have been called
        self.assertEqual(fire_download_succeeded.call_count, 0)

        # fire_download_failed should have been called once with the report. Let's assert that, and assert
        # that the report looks good
        self.assertEqual(fire_download_failed.call_count, 1)
        report = fire_download_failed.mock_calls[0][1][0]
        self.assertTrue(isinstance(report, DownloadReport))
        self.assertTrue(report.state, DOWNLOAD_FAILED)
        self.assertEqual(report.error_report['response_code'], 0)
        self.assertEqual(report.error_report['error_code'], pycurl.E_COULDNT_CONNECT)
        self.assertEqual(report.error_report['error_message'], "Couldn't Connect")
        # It's difficult to know what the finish_time on the report will be exactly, so we'll just assert that
        # it's after the start_time we recorded earlier
        self.assertTrue(report.finish_time > start_time)
    def test_proxy_url_set(self):
        """
        Test correct behavior when only proxy_url is set.
        """
        proxy_url = u'http://proxy.com/server/'
        config = DownloaderConfig(proxy_url=proxy_url)
        curl_downloader = HTTPCurlDownloader(config)
        easy_handle = mock.MagicMock()

        curl_downloader._add_proxy_configuration(easy_handle)

        # There should be two calls to setopt(). One to set the URL, and another to set
        # the proxy type to HTTP
        self.assertEqual(easy_handle.setopt.call_count, 2)
        easy_handle.setopt.assert_any_call(pycurl.PROXY, unicode(str(proxy_url)))
        easy_handle.setopt.assert_any_call(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP)
        self._assert_all_strings_handed_to_pycurl_are_strs(easy_handle.setopt.mock_calls)
Example #13
0
def demo(demo_name):

    downloader_config = DownloaderConfig(max_concurrent=None)
    downloader = HTTPCurlDownloader(downloader_config, DemoEventListener())

    storage_dir = mkdtemp(prefix=demo_name)
    url_list = URLS_MAP[demo_name]()
    request_list = requests_from_urls(storage_dir, url_list)

    print demo_name.upper(), 'Demo'
    print 'downloading %d files to %s' % (len(url_list), storage_dir)
    print '=' * 80

    start_time = datetime.now()

    report_list = downloader.download(request_list)

    run_time = datetime.now() - start_time
    print '%s downloaded %d files: %s' % (demo_name, len(report_list), str(run_time))
Example #14
0
def main():
    test_names = _get_test_names()

    for name in test_names:
        url_list = TESTS[name]()

        print '%s: download %d files from %s' % (name.upper(), len(url_list),
                                                 url_list[0].rsplit('/', 1)[0])

        config = DownloaderConfig()
        download_dir = tempfile.mkdtemp(prefix=name + '-')
        request_list = [
            DownloadRequest(
                url, os.path.join(download_dir, _filename_from_url(url)))
            for url in url_list
        ]

        downloader = HTTPCurlDownloader(config, TestDownloadEventListener())
        downloader.download(request_list)
    def test_proxy_port_set(self):
        """
        Test correct behavior when proxy_url and proxy_port are set.
        """
        proxy_url = u'http://proxy.com/server/'
        proxy_port = '3128'
        config = DownloaderConfig(proxy_url=proxy_url, proxy_port=proxy_port)
        curl_downloader = HTTPCurlDownloader(config)
        easy_handle = mock.MagicMock()

        curl_downloader._add_proxy_configuration(easy_handle)

        # There should be three calls to setopt(). One to set the URL, one another to set
        # the proxy type to HTTP, and a third to set the proxy port.
        self.assertEqual(easy_handle.setopt.call_count, 3)
        easy_handle.setopt.assert_any_call(pycurl.PROXY, str(proxy_url))
        easy_handle.setopt.assert_any_call(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP)
        easy_handle.setopt.assert_any_call(pycurl.PROXYPORT, int(proxy_port))
        self._assert_all_strings_handed_to_pycurl_are_strs(easy_handle.setopt.mock_calls)
 def test_file_scheme_with_invalid_path(self):
     """
     In this test, we're making sure that file:// URLs work and is reported as failed
     when the path is invalid.
     """
     # Test
     config = DownloaderConfig(max_concurrent=1)
     downloader = HTTPCurlDownloader(config)
     request_list = self._file_download_requests()[:1]
     request_list[0].url += 'BADPATHBADPATHBADPATH'  # booger up the path
     listener = AggregatingEventListener()
     downloader.event_listener = listener
     downloader.download(request_list)
     # Verify
     self.assertEqual(len(listener.succeeded_reports), 0)
     self.assertEqual(len(listener.failed_reports), 1)
     report = listener.failed_reports[0]
     self.assertEqual(report.bytes_downloaded, 0)
     self.assertEqual(report.error_report['response_code'], 0)
    def test_proxy_username_password_set(self):
        """
        Test correct behavior when proxy_url, proxy_username, and proxy_password are all
        set.
        """
        proxy_url = u'http://proxy.com/server/'
        proxy_username = u'steve'
        proxy_password = u'1luvpr0xysrvrs'
        config = DownloaderConfig(proxy_url=proxy_url, proxy_username=proxy_username,
                                  proxy_password=proxy_password)
        curl_downloader = HTTPCurlDownloader(config)
        easy_handle = mock.MagicMock()

        curl_downloader._add_proxy_configuration(easy_handle)

        # There should be four calls to setopt(). One to set the URL, one to set
        # the proxy type to HTTP, one to set the auth type to HTTP basic auth, and
        # another to pass the username and password.
        self.assertEqual(easy_handle.setopt.call_count, 4)
        easy_handle.setopt.assert_any_call(pycurl.PROXY, str(proxy_url))
        easy_handle.setopt.assert_any_call(pycurl.PROXYTYPE, pycurl.PROXYTYPE_HTTP)
        easy_handle.setopt.assert_any_call(pycurl.PROXYAUTH, pycurl.HTTPAUTH_BASIC)
        easy_handle.setopt.assert_any_call(pycurl.PROXYUSERPWD, '%s:%s'%(proxy_username, proxy_password))
        self._assert_all_strings_handed_to_pycurl_are_strs(easy_handle.setopt.mock_calls)