Ejemplo n.º 1
0
    def process_response(self, request: HttpRequest,
                         response: StreamingHttpResponse) -> StreamingHttpResponse:
        def alter_content(content: bytes) -> bytes:
            str_content = content.decode("utf-8")
            bs = BeautifulSoup(str_content, features='lxml')
            # Skip any admonition (warning) blocks, since they're
            # usually something about users needing to be an
            # organization administrator, and not useful for
            # describing the page.
            for tag in bs.find_all('div', class_="admonition"):
                tag.clear()

            # Find the first paragraph after that, and convert it from HTML to text.
            first_paragraph_text = bs.find('p').text.replace('\n', ' ')
            return content.replace(request.placeholder_open_graph_description.encode("utf-8"),
                                   first_paragraph_text.encode("utf-8"))

        def wrap_streaming_content(content: Iterable[bytes]) -> Iterable[bytes]:
            for chunk in content:
                yield alter_content(chunk)

        if getattr(request, "placeholder_open_graph_description", None) is not None:
            assert not response.streaming
            response.content = alter_content(response.content)
        return response
Ejemplo n.º 2
0
    def process_response(self, request: HttpRequest,
                         response: StreamingHttpResponse) -> StreamingHttpResponse:

        if getattr(request, "placeholder_open_graph_description", None) is not None:
            assert not response.streaming
            response.content = alter_content(request, response.content)
        return response
Ejemplo n.º 3
0
    def process_response(
            self, request: HttpRequest,
            response: StreamingHttpResponse) -> StreamingHttpResponse:
        @cache_with_key(open_graph_description_cache_key, timeout=3600 * 24)
        def get_content_description(content: bytes,
                                    request: HttpRequest) -> str:
            str_content = content.decode("utf-8")
            bs = BeautifulSoup(str_content, features='lxml')
            # Skip any admonition (warning) blocks, since they're
            # usually something about users needing to be an
            # organization administrator, and not useful for
            # describing the page.
            for tag in bs.find_all('div', class_="admonition"):
                tag.clear()

            # Find the first paragraph after that, and convert it from HTML to text.
            first_paragraph_text = bs.find('p').text.replace('\n', ' ')
            return first_paragraph_text

        def alter_content(content: bytes) -> bytes:
            first_paragraph_text = get_content_description(content, request)
            return content.replace(
                request.placeholder_open_graph_description.encode("utf-8"),
                first_paragraph_text.encode("utf-8"))

        def wrap_streaming_content(
                content: Iterable[bytes]) -> Iterable[bytes]:
            for chunk in content:
                yield alter_content(chunk)

        if getattr(request, "placeholder_open_graph_description",
                   None) is not None:
            assert not response.streaming
            response.content = alter_content(response.content)
        return response
Ejemplo n.º 4
0
    def process_response(self, request: HttpRequest,
                         response: StreamingHttpResponse) -> StreamingHttpResponse:

        if getattr(request, "placeholder_open_graph_description", None) is not None:
            assert not response.streaming
            response.content = alter_content(request, response.content)
        return response
Ejemplo n.º 5
0
    def process_response(
        self, request: HttpRequest, response: StreamingHttpResponse
    ) -> StreamingHttpResponse:

        if RequestNotes.get_notes(request).placeholder_open_graph_description is not None:
            assert not response.streaming
            response.content = alter_content(request, response.content)
        return response
Ejemplo n.º 6
0
    def test_streaming_response(self):
        r = StreamingHttpResponse(iter(['hello', 'world']))

        # iterating over the response itself yields bytestring chunks.
        chunks = list(r)
        self.assertEqual(chunks, [b'hello', b'world'])
        for chunk in chunks:
            self.assertIsInstance(chunk, six.binary_type)

        # and the response can only be iterated once.
        self.assertEqual(list(r), [])

        # even when a sequence that can be iterated many times, like a list,
        # is given as content.
        r = StreamingHttpResponse(['abc', 'def'])
        self.assertEqual(list(r), [b'abc', b'def'])
        self.assertEqual(list(r), [])

        # streaming responses don't have a `content` attribute.
        self.assertFalse(hasattr(r, 'content'))

        # and you can't accidentally assign to a `content` attribute.
        with self.assertRaises(AttributeError):
            r.content = 'xyz'

        # but they do have a `streaming_content` attribute.
        self.assertTrue(hasattr(r, 'streaming_content'))

        # that exists so we can check if a response is streaming, and wrap or
        # replace the content iterator.
        r.streaming_content = iter(['abc', 'def'])
        r.streaming_content = (chunk.upper() for chunk in r.streaming_content)
        self.assertEqual(list(r), [b'ABC', b'DEF'])

        # coercing a streaming response to bytes doesn't return a complete HTTP
        # message like a regular response does. it only gives us the headers.
        r = StreamingHttpResponse(iter(['hello', 'world']))
        self.assertEqual(
            six.binary_type(r), b'Content-Type: text/html; charset=utf-8')

        # and this won't consume its content.
        self.assertEqual(list(r), [b'hello', b'world'])

        # additional content cannot be written to the response.
        r = StreamingHttpResponse(iter(['hello', 'world']))
        with self.assertRaises(Exception):
            r.write('!')

        # and we can't tell the current position.
        with self.assertRaises(Exception):
            r.tell()

        r = StreamingHttpResponse(iter(['hello', 'world']))
        self.assertEqual(r.getvalue(), b'helloworld')
Ejemplo n.º 7
0
    def process_response(self, request: HttpRequest,
                         response: StreamingHttpResponse) -> StreamingHttpResponse:
        @cache_with_key(open_graph_description_cache_key, timeout=3600*24)
        def get_content_description(content: bytes, request: HttpRequest) -> str:
            str_content = content.decode("utf-8")
            bs = BeautifulSoup(str_content, features='lxml')
            # Skip any admonition (warning) blocks, since they're
            # usually something about users needing to be an
            # organization administrator, and not useful for
            # describing the page.
            for tag in bs.find_all('div', class_="admonition"):
                tag.clear()

            # Skip code-sections, which just contains navigation instructions.
            for tag in bs.find_all('div', class_="code-section"):
                tag.clear()

            text = ''
            for paragraph in bs.find_all('p'):
                # .text converts it from HTML to text
                text = text + paragraph.text + ' '
                if len(text) > 500:
                    return ' '.join(text.split())
            return ' '.join(text.split())

        def alter_content(content: bytes) -> bytes:
            first_paragraph_text = get_content_description(content, request)
            return content.replace(request.placeholder_open_graph_description.encode("utf-8"),
                                   first_paragraph_text.encode("utf-8"))

        def wrap_streaming_content(content: Iterable[bytes]) -> Iterable[bytes]:
            for chunk in content:
                yield alter_content(chunk)

        if getattr(request, "placeholder_open_graph_description", None) is not None:
            assert not response.streaming
            response.content = alter_content(response.content)
        return response
Ejemplo n.º 8
0
    def test_streaming_response(self):
        r = StreamingHttpResponse(iter(["hello", "world"]))

        # iterating over the response itself yields bytestring chunks.
        chunks = list(r)
        self.assertEqual(chunks, [b"hello", b"world"])
        for chunk in chunks:
            self.assertIsInstance(chunk, six.binary_type)

        # and the response can only be iterated once.
        self.assertEqual(list(r), [])

        # even when a sequence that can be iterated many times, like a list,
        # is given as content.
        r = StreamingHttpResponse(["abc", "def"])
        self.assertEqual(list(r), [b"abc", b"def"])
        self.assertEqual(list(r), [])

        # iterating over Unicode strings still yields bytestring chunks.
        r.streaming_content = iter(["hello", "café"])
        chunks = list(r)
        # '\xc3\xa9' == unichr(233).encode('utf-8')
        self.assertEqual(chunks, [b"hello", b"caf\xc3\xa9"])
        for chunk in chunks:
            self.assertIsInstance(chunk, six.binary_type)

        # streaming responses don't have a `content` attribute.
        self.assertFalse(hasattr(r, "content"))

        # and you can't accidentally assign to a `content` attribute.
        with self.assertRaises(AttributeError):
            r.content = "xyz"

        # but they do have a `streaming_content` attribute.
        self.assertTrue(hasattr(r, "streaming_content"))

        # that exists so we can check if a response is streaming, and wrap or
        # replace the content iterator.
        r.streaming_content = iter(["abc", "def"])
        r.streaming_content = (chunk.upper() for chunk in r.streaming_content)
        self.assertEqual(list(r), [b"ABC", b"DEF"])

        # coercing a streaming response to bytes doesn't return a complete HTTP
        # message like a regular response does. it only gives us the headers.
        r = StreamingHttpResponse(iter(["hello", "world"]))
        self.assertEqual(six.binary_type(r), b"Content-Type: text/html; charset=utf-8")

        # and this won't consume its content.
        self.assertEqual(list(r), [b"hello", b"world"])

        # additional content cannot be written to the response.
        r = StreamingHttpResponse(iter(["hello", "world"]))
        with self.assertRaises(Exception):
            r.write("!")

        # and we can't tell the current position.
        with self.assertRaises(Exception):
            r.tell()

        r = StreamingHttpResponse(iter(["hello", "world"]))
        self.assertEqual(r.getvalue(), b"helloworld")
Ejemplo n.º 9
0
    def test_streaming_response(self):
        r = StreamingHttpResponse(iter(["hello", "world"]))

        # iterating over the response itself yields bytestring chunks.
        chunks = list(r)
        self.assertEqual(chunks, [b"hello", b"world"])
        for chunk in chunks:
            self.assertIsInstance(chunk, bytes)

        # and the response can only be iterated once.
        self.assertEqual(list(r), [])

        # even when a sequence that can be iterated many times, like a list,
        # is given as content.
        r = StreamingHttpResponse(["abc", "def"])
        self.assertEqual(list(r), [b"abc", b"def"])
        self.assertEqual(list(r), [])

        # iterating over strings still yields bytestring chunks.
        r.streaming_content = iter(["hello", "café"])
        chunks = list(r)
        # '\xc3\xa9' == unichr(233).encode()
        self.assertEqual(chunks, [b"hello", b"caf\xc3\xa9"])
        for chunk in chunks:
            self.assertIsInstance(chunk, bytes)

        # streaming responses don't have a `content` attribute.
        self.assertFalse(hasattr(r, "content"))

        # and you can't accidentally assign to a `content` attribute.
        with self.assertRaises(AttributeError):
            r.content = "xyz"

        # but they do have a `streaming_content` attribute.
        self.assertTrue(hasattr(r, "streaming_content"))

        # that exists so we can check if a response is streaming, and wrap or
        # replace the content iterator.
        r.streaming_content = iter(["abc", "def"])
        r.streaming_content = (chunk.upper() for chunk in r.streaming_content)
        self.assertEqual(list(r), [b"ABC", b"DEF"])

        # coercing a streaming response to bytes doesn't return a complete HTTP
        # message like a regular response does. it only gives us the headers.
        r = StreamingHttpResponse(iter(["hello", "world"]))
        self.assertEqual(bytes(r), b"Content-Type: text/html; charset=utf-8")

        # and this won't consume its content.
        self.assertEqual(list(r), [b"hello", b"world"])

        # additional content cannot be written to the response.
        r = StreamingHttpResponse(iter(["hello", "world"]))
        with self.assertRaises(Exception):
            r.write("!")

        # and we can't tell the current position.
        with self.assertRaises(Exception):
            r.tell()

        r = StreamingHttpResponse(iter(["hello", "world"]))
        self.assertEqual(r.getvalue(), b"helloworld")