Example #1
0
    def test_basic(self):
        http_client = Client()
        client = RichClient(http_client)
        session = client.session(Request.new(self.get_url('/')))

        self.assertFalse(session.done)
        response = yield session.fetch()

        self.assertEqual(200, response.status_code)
        self.assertTrue(session.done)
Example #2
0
    def test_basic(self):
        http_client = Client()
        client = RichClient(http_client)
        session = client.session(Request.new(self.get_url('/')))

        self.assertFalse(session.done)
        response = yield session.fetch()

        self.assertEqual(200, response.status_code)
        self.assertTrue(session.done)
Example #3
0
    def test_bad_redirect(self):
        http_client = Client()
        client = RichClient(http_client)
        session = client.session(Request.new(self.get_url('/bad_redirect')))

        while not session.done:
            try:
                yield session.fetch()
            except ProtocolError:
                return
            else:
                self.fail()
Example #4
0
    def test_bad_redirect(self):
        http_client = Client()
        client = RichClient(http_client)
        session = client.session(Request.new(self.get_url('/bad_redirect')))

        while not session.done:
            try:
                yield session.fetch()
            except ProtocolError:
                return
            else:
                self.fail()
Example #5
0
    def test_fetch_disallow(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com'))

        self.assertEqual(RobotsState.unknown, session._robots_state)

        request = session.next_request
        self.assertTrue(request.url_info.url.endswith('robots.txt'))

        response = Response('HTTP/1.0', 200, 'OK')
        response.body.content_file = io.StringIO('User-agent:*\nDisallow: /\n')

        http_client.response = response
        yield session.fetch()

        self.assertEqual(RobotsState.denied, session._robots_state)

        request = session.next_request
        self.assertIsNone(request)

        try:
            yield session.fetch()
        except RobotsDenied:
            pass
        else:
            self.fail()

        self.assertTrue(session.done)
Example #6
0
    def test_redirect_loop(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        for dummy in range(21):
            request = session.next_request
            self.assertTrue(request.url_info.url.endswith('robots.txt'))

            response = Response('HTTP/1.0', 302, 'See else')
            response.url_info = request.url_info
            response.fields['location'] = '/robots.txt'

            http_client.response = response
            yield session.fetch()

        request = session.next_request
        self.assertTrue(request)

        response = Response('HTTP/1.0', 200, 'OK')

        http_client.response = response
        yield session.fetch()

        self.assertEqual(RobotsState.ok, session._robots_state)

        print(session.next_request)
        self.assertTrue(session.done)
Example #7
0
    def test_server_error(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com')
        )

        self.assertEqual(RobotsState.unknown, session._robots_state)

        for dummy in range(21):
            request = session.next_request
            self.assertTrue(request.url_info.url.endswith('robots.txt'))

            response = Response('HTTP/1.0', 500, 'Opps')

            http_client.response = response
            yield session.fetch()

        request = session.next_request
        self.assertIsNone(request)

        try:
            yield session.fetch()
        except RobotsDenied:
            pass
        else:
            self.fail()

        self.assertTrue(session.done)
Example #8
0
    def test_redirect(self):
        http_client = Client()
        client = RichClient(http_client)
        session = client.session(Request.new(self.get_url('/redirect')))

        status_codes = []

        while not session.done:
            response = yield session.fetch()
            if not status_codes:
                self.assertEqual(
                    RichClientResponseType.redirect, session.response_type)
            status_codes.append(response.status_code)

        self.assertEqual([301, 200], status_codes)
        self.assertTrue(session.done)
        self.assertEqual(RichClientResponseType.normal, session.response_type)
Example #9
0
    def test_redirect(self):
        http_client = Client()
        client = RichClient(http_client)
        session = client.session(Request.new(self.get_url('/redirect')))

        status_codes = []

        while not session.done:
            response = yield session.fetch()
            if not status_codes:
                self.assertEqual(RichClientResponseType.redirect,
                                 session.response_type)
            status_codes.append(response.status_code)

        self.assertEqual([301, 200], status_codes)
        self.assertTrue(session.done)
        self.assertEqual(RichClientResponseType.normal, session.response_type)
Example #10
0
    def test_fetch_allow_redirects(self):
        http_client = MockHTTPClient()
        pool = RobotsTxtPool()
        client = RichClient(http_client, pool)
        session = MockRobotsTxtRichClientSession(
            client, Request.new('http://example.com'))

        self.assertEqual(RobotsState.unknown, session._robots_state)

        # Try fetch example.com/ (need robots.txt)
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://example.com/robots.txt', request.url_info.url)
        response = Response('HTTP/1.0', 301, 'Moved')
        response.fields['location'] = 'http://www.example.com/robots.txt'
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.in_progress, session._robots_state)

        # Try fetch www.example.com/robots.txt
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://www.example.com/robots.txt',
                         request.url_info.url)
        response = Response('HTTP/1.0', 301, 'Moved')
        response.fields['location'] = 'http://www.example.net/robots.txt'
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.in_progress, session._robots_state)

        # Try fetch www.example.net/robots.txt
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://www.example.net/robots.txt',
                         request.url_info.url)
        response = Response('HTTP/1.0', 200, 'OK')
        response.body.content_file = io.StringIO('User-agent:*\nAllow: /\n')
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.ok, session._robots_state)

        # Try fetch example.com/ (robots.txt already fetched)
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://example.com/', request.url_info.url)
        response = Response('HTTP/1.0', 301, 'Moved')
        response.fields['location'] = 'http://www.example.com/'
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.ok, session._robots_state)

        # Try www.example.com/ (robots.txt already fetched)
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://www.example.com/', request.url_info.url)
        response = Response('HTTP/1.0', 301, 'Moved')
        response.fields['location'] = 'http://www.example.net/'
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.ok, session._robots_state)

        # Try www.example.net/ (robots.txt already fetched)
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://www.example.net/', request.url_info.url)
        response = Response('HTTP/1.0', 301, 'Moved')
        response.fields['location'] = 'http://lol.example.net/'
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.ok, session._robots_state)

        # Try lol.example.net/ (need robots.txt)
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://lol.example.net/robots.txt',
                         request.url_info.url)
        response = Response('HTTP/1.0', 200, 'OK')
        response.body.content_file = io.StringIO('User-agent:*\nAllow: /\n')
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.in_progress, session._robots_state)

        # Try lol.example.net/ (robots.txt already fetched)
        self.assertFalse(session.done)
        request = session.next_request
        self.assertEqual('http://lol.example.net/', request.url_info.url)
        response = Response('HTTP/1.0', 200, 'OK')
        http_client.response = response
        yield session.fetch()
        self.assertEqual(RobotsState.ok, session._robots_state)

        self.assertTrue(session.done)