Exemple #1
0
 def __init__(self,
              config,
              page_queue,
              items_queue,
              request_sender=RequestSender()):
     threading.Thread.__init__(self)
     self.config = config
     self.page_queue = page_queue
     self.items_queue = items_queue
     self.request_sender = request_sender
     self.request_sender.session = self.config.session
     self.request_sender.timeout = self.config.request_timeout
Exemple #2
0
 def __init__(self,
              url,
              page_scraper,
              request_factory=RequestFactory(),
              extractor_factory=ExtractorFactory(),
              url_joiner=UrlJoiner(),
              request_sender=RequestSender(),
              compatibility_factory=CompatibilityFactory()):
     self.url = url
     self.scraper = page_scraper
     self._content = None
     self.extractor = None
     self.request_factory = request_factory
     self.request = self.request_factory.create_request('GET', self.url)
     self.extractor_factory = extractor_factory
     self.url_joiner = url_joiner
     self.request_sender = request_sender
     self.string_converter = compatibility_factory.create_compatible_string_converter(
     )
 def setUp(self):
     string_converter = mock.create_autospec(StringConverter).return_value
     session = mock_factory.create_mock_session()
     timeout = (5, 5)
     self.request_sender = RequestSender(string_converter, session, timeout)
class TestRequestSender(unittest.TestCase):

    def setUp(self):
        string_converter = mock.create_autospec(StringConverter).return_value
        session = mock_factory.create_mock_session()
        timeout = (5, 5)
        self.request_sender = RequestSender(string_converter, session, timeout)

    def test_get_binary(self):
        mock_request = mock.create_autospec(Request).return_value
        mock_response = mock.create_autospec(Response).return_value
        mock_response.content = "<html><a href='url1'>text1</a><a href='url2'>text2</a></html>"
        self.request_sender.session.send.return_value = mock_response
        result = self.request_sender.get_binary(mock_request)
        self.assertEquals(result, mock_response.content)

    @mock.patch('xcrawler.http.requests.request_sender.base64')
    @mock.patch.object(RequestSender, 'get_binary')
    def test_get_base64(self, mock_get_binary, mock_base64):
        mock_request = mock.create_autospec(Request).return_value
        mock_string_content = "<html><a href='url1'>text1</a></html>"
        mock_base64_content = "PGh0bWw+PGEgaHJlZj0ndXJsMSc+dGV4dDE8L2E+PC9odG1sPg=="
        mock_base64.b64encode.return_value = mock_base64_content
        mock_get_binary.return_value = mock_string_content
        result = self.request_sender.get_base64(mock_request)
        self.assertEquals(result, mock_base64_content)

    @mock.patch.object(RequestSender, 'get_binary')
    def test_get_element(self, mock_get_binary):
        mock_request = mock.create_autospec(Request).return_value
        mock_string_content = "<html><a href='url1'>text1</a><a href='url2'>text2</a></html>"
        mock_element_content = mock.create_autospec(Element).return_value
        mock_get_binary.return_value = mock_string_content
        self.request_sender.string_converter.convert_to_tree_elements.return_value = mock_element_content
        result = self.request_sender.get_element(mock_request)
        self.assertEquals(result, mock_element_content)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_connection_error(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "htt;//mockurl.mock"
        mock_exception = mock.create_autospec(exceptions.ConnectionError).return_value
        mock_exception.__str__.return_value = "DNS failure"
        self.request_sender.handle_request_exception(mock_request, mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_httperror(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(exceptions.HTTPError).return_value
        mock_exception.__str__.return_value = "Invalid HTTP respons"
        self.request_sender.handle_request_exception(mock_request, mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_urlrequired(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "htt;//mockurl.mock"
        mock_exception = mock.create_autospec(exceptions.URLRequired).return_value
        mock_exception.__str__.return_value = "Invalid url"
        self.request_sender.handle_request_exception(mock_request, mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_toomanyredirects(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "htt;//mockurl.mock"
        mock_exception = mock.create_autospec(exceptions.TooManyRedirects).return_value
        mock_exception.__str__.return_value = "Number of maximum redirections exceeded"
        self.request_sender.handle_request_exception(mock_request, mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_timeout(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(exceptions.Timeout).return_value
        mock_exception.__str__.return_value = "Timeout reached"
        self.request_sender.handle_request_exception(mock_request, mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_request_exception(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(exceptions.RequestException).return_value
        mock_exception.__str__.return_value = "RequestException exception message"
        self.request_sender.handle_request_exception(mock_request, mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_baseexception(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(BaseException).return_value
        mock_exception.__str__.return_value = "BaseException message"
        self.request_sender.handle_request_exception(mock_request, mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)
 def setUp(self):
     string_converter = mock.create_autospec(StringConverter).return_value
     session = mock_factory.create_mock_session()
     timeout = (5, 5)
     self.request_sender = RequestSender(string_converter, session, timeout)
class TestRequestSender(unittest.TestCase):
    def setUp(self):
        string_converter = mock.create_autospec(StringConverter).return_value
        session = mock_factory.create_mock_session()
        timeout = (5, 5)
        self.request_sender = RequestSender(string_converter, session, timeout)

    def test_get_binary(self):
        mock_request = mock.create_autospec(Request).return_value
        mock_response = mock.create_autospec(Response).return_value
        mock_response.content = "<html><a href='url1'>text1</a><a href='url2'>text2</a></html>"
        self.request_sender.session.send.return_value = mock_response
        result = self.request_sender.get_binary(mock_request)
        self.assertEquals(result, mock_response.content)

    @mock.patch('xcrawler.http.requests.request_sender.base64')
    @mock.patch.object(RequestSender, 'get_binary')
    def test_get_base64(self, mock_get_binary, mock_base64):
        mock_request = mock.create_autospec(Request).return_value
        mock_string_content = "<html><a href='url1'>text1</a></html>"
        mock_base64_content = "PGh0bWw+PGEgaHJlZj0ndXJsMSc+dGV4dDE8L2E+PC9odG1sPg=="
        mock_base64.b64encode.return_value = mock_base64_content
        mock_get_binary.return_value = mock_string_content
        result = self.request_sender.get_base64(mock_request)
        self.assertEquals(result, mock_base64_content)

    @mock.patch.object(RequestSender, 'get_binary')
    def test_get_element(self, mock_get_binary):
        mock_request = mock.create_autospec(Request).return_value
        mock_string_content = "<html><a href='url1'>text1</a><a href='url2'>text2</a></html>"
        mock_element_content = mock.create_autospec(Element).return_value
        mock_get_binary.return_value = mock_string_content
        self.request_sender.string_converter.convert_to_tree_elements.return_value = mock_element_content
        result = self.request_sender.get_element(mock_request)
        self.assertEquals(result, mock_element_content)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_connection_error(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "htt;//mockurl.mock"
        mock_exception = mock.create_autospec(
            exceptions.ConnectionError).return_value
        mock_exception.__str__.return_value = "DNS failure"
        self.request_sender.handle_request_exception(mock_request,
                                                     mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_httperror(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(
            exceptions.HTTPError).return_value
        mock_exception.__str__.return_value = "Invalid HTTP respons"
        self.request_sender.handle_request_exception(mock_request,
                                                     mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_urlrequired(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "htt;//mockurl.mock"
        mock_exception = mock.create_autospec(
            exceptions.URLRequired).return_value
        mock_exception.__str__.return_value = "Invalid url"
        self.request_sender.handle_request_exception(mock_request,
                                                     mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_toomanyredirects(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "htt;//mockurl.mock"
        mock_exception = mock.create_autospec(
            exceptions.TooManyRedirects).return_value
        mock_exception.__str__.return_value = "Number of maximum redirections exceeded"
        self.request_sender.handle_request_exception(mock_request,
                                                     mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_timeout(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(exceptions.Timeout).return_value
        mock_exception.__str__.return_value = "Timeout reached"
        self.request_sender.handle_request_exception(mock_request,
                                                     mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_request_exception(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(
            exceptions.RequestException).return_value
        mock_exception.__str__.return_value = "RequestException exception message"
        self.request_sender.handle_request_exception(mock_request,
                                                     mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)

    @mock.patch('tests.http.requests.test_request_sender.builtins.print')
    def test_handle_exception_baseexception(self, mock_print_function):
        mock_request = mock.create_autospec(Request).return_value
        mock_request.url = "http://mockurl.mock"
        mock_exception = mock.create_autospec(BaseException).return_value
        mock_exception.__str__.return_value = "BaseException message"
        self.request_sender.handle_request_exception(mock_request,
                                                     mock_exception)
        self.assertEquals(mock_print_function.call_count, 2)