Example #1
0
def my_fetcher(url):
    uri = url
    # Otherwise fetch the data
    response = subrequest(unquote(uri))

    # Handle redirects
    if response.status == 301:
        uri = response.getHeader('location')
        response = subrequest(unquote(uri))

    if response.status != 200:
        raise Exception("URI not found")

    content_type = response.getHeader('content-type')
    # Default encoding
    encoding = 'utf-8'

    if content_type:
        if ';' in content_type:
            ctype, encoding = content_type.split(';')
            encoding = encoding.split('charset=')[-1]
        else:
            ctype = content_type
    # Guess the content_type from the URI if needed
    else:
        ctype, encoding = guess_content_type(uri)
        if ctype and ctype.startswith('text/'):
            ctype = text_type(uri)

    data = response.getBody()

    # I don't think we need to encode ctype == 'text/css' to ascii anymore
    return dict(string=data, mime_type=ctype, encoding=encoding)
Example #2
0
    def test_text_type(self):
        HTML = "<HtmL><body>hello world</body></html>"
        from zope.contenttype import text_type

        self.assertEqual(text_type(HTML), "text/html")
        self.assertEqual(text_type('<?xml version="1.0"><foo/>'), "text/xml")
        self.assertEqual(text_type('<?XML version="1.0"><foo/>'), "text/plain")
        self.assertEqual(text_type("foo bar"), "text/plain")
        self.assertEqual(
            text_type(
                "<!DOCTYPE HTML PUBLIC "
                '"-//W3C//DTD HTML 4.01 Transitional//EN" '
                '"http://www.w3.org/TR/html4/loose.dtd">'
            ),
            "text/html",
        )
        # See https://bugs.launchpad.net/bugs/487998
        self.assertEqual(text_type(" " * 14 + HTML), "text/html")
        self.assertEqual(text_type(" " * 14 + "abc"), "text/plain")
        self.assertEqual(text_type(" " * 14), "text/plain")
Example #3
0
 def test_text_type(self):
     HTML = b'<HtmL><body>hello world</body></html>'
     from zope.contenttype import text_type
     self.assertEqual(text_type(HTML), 'text/html')
     self.assertEqual(text_type(b'<?xml version="1.0"><foo/>'), 'text/xml')
     self.assertEqual(text_type(b'<?XML version="1.0"><foo/>'), 'text/xml')
     self.assertEqual(text_type(b'foo bar'), 'text/plain')
     self.assertEqual(
         text_type(b'<!DOCTYPE HTML PUBLIC '
                   b'"-//W3C//DTD HTML 4.01 Transitional//EN" '
                   b'"http://www.w3.org/TR/html4/loose.dtd">'), 'text/html')
     self.assertEqual(text_type(b'\n\n<!DOCTYPE html>\n'), 'text/html')
     # we can also parse text snippets
     self.assertEqual(text_type(b'<p>Hello</p>'), 'text/html')
     longtext = b'abc ' * 100
     self.assertEqual(text_type(b'<p>' + longtext + b'</p>'), 'text/html')
     # See https://bugs.launchpad.net/bugs/487998
     self.assertEqual(text_type(b' ' * 14 + HTML), 'text/html')
     self.assertEqual(text_type(b' ' * 14 + b'abc'), 'text/plain')
     self.assertEqual(text_type(b' ' * 14), 'text/plain')
 def test_text_type(self):
     HTML = b'<HtmL><body>hello world</body></html>'
     from zope.contenttype import text_type
     self.assertEqual(text_type(HTML),
                      'text/html')
     self.assertEqual(text_type(b'<?xml version="1.0"><foo/>'),
                      'text/xml')
     self.assertEqual(text_type(b'<?XML version="1.0"><foo/>'),
                      'text/xml')
     self.assertEqual(text_type(b'foo bar'),
                      'text/plain')
     self.assertEqual(text_type(b'<!DOCTYPE HTML PUBLIC '
                                b'"-//W3C//DTD HTML 4.01 Transitional//EN" '
                                b'"http://www.w3.org/TR/html4/loose.dtd">'),
                      'text/html')
     self.assertEqual(text_type(b'\n\n<!DOCTYPE html>\n'), 'text/html')
     # we can also parse text snippets
     self.assertEqual(text_type(b'<p>Hello</p>'), 'text/html')
     longtext = b'abc ' * 100
     self.assertEqual(text_type(b'<p>' + longtext + b'</p>'), 'text/html')
     # See https://bugs.launchpad.net/bugs/487998
     self.assertEqual(text_type(b' ' * 14 + HTML),
                      'text/html')
     self.assertEqual(text_type(b' ' * 14 + b'abc'),
                      'text/plain')
     self.assertEqual(text_type(b' ' * 14),
                      'text/plain')