Ejemplo n.º 1
0
    def urlhandle_detect_ext(self, url_handle):
        getheader = url_handle.headers.get

        def encode_compat_str_or_none(x, encoding='iso-8859-1', errors='ignore'):
            return encode_compat_str(x, encoding=encoding, errors=errors) if x else None

        cd = encode_compat_str_or_none(getheader('Content-Disposition'))
        if cd:
            m = re.match(
                r'''(?xi)
                attachment;\s*
                (?:filename\s*=[^;]+?;\s*)?                    # possible initial filename=...;, ignored
                filename(?P<x>\*)?\s*=\s*                      # filename/filename* =
                    (?(x)(?P<charset>\S+?)'[\w-]*'|(?P<q>")?)  # if * then charset'...' else maybe "
                    (?P<filename>(?(q)[^"]+(?=")|[^\s;]+))         # actual name of file
                ''',
                cd,
            )
            if m:
                m = m.groupdict()
                filename = m.get('filename')
                if m.get('x'):
                    try:
                        filename = compat_urllib_parse_unquote(filename, encoding=m.get('charset', 'utf-8'))
                    except LookupError:  # unrecognised character set name
                        pass
                e = determine_ext(filename, default_ext=None)
                if e:
                    return e

        ct = encode_compat_str_or_none(getheader('Content-Type'))
        return mimetype2ext(ct)
Ejemplo n.º 2
0
    def test_compat_urllib_parse_unquote(self):
        self.assertEqual(compat_urllib_parse_unquote('abc%20def'), 'abc def')
        self.assertEqual(compat_urllib_parse_unquote('%7e/abc+def'),
                         '~/abc+def')
        self.assertEqual(compat_urllib_parse_unquote(''), '')
        self.assertEqual(compat_urllib_parse_unquote('%'), '%')
        self.assertEqual(compat_urllib_parse_unquote('%%'), '%%')
        self.assertEqual(compat_urllib_parse_unquote('%%%'), '%%%')
        self.assertEqual(compat_urllib_parse_unquote('%2F'), '/')
        self.assertEqual(compat_urllib_parse_unquote('%2f'), '/')
        self.assertEqual(compat_urllib_parse_unquote('%E6%B4%A5%E6%B3%A2'),
                         '津波')
        self.assertEqual(
            compat_urllib_parse_unquote(
                '''<meta property="og:description" content="%E2%96%81%E2%96%82%E2%96%83%E2%96%84%25%E2%96%85%E2%96%86%E2%96%87%E2%96%88" />
%<a href="https://ar.wikipedia.org/wiki/%D8%AA%D8%B3%D9%88%D9%86%D8%A7%D9%85%D9%8A">%a'''
            ), '''<meta property="og:description" content="▁▂▃▄%▅▆▇█" />
%<a href="https://ar.wikipedia.org/wiki/تسونامي">%a''')
        self.assertEqual(
            compat_urllib_parse_unquote(
                '''%28%5E%E2%97%A3_%E2%97%A2%5E%29%E3%81%A3%EF%B8%BB%E3%83%87%E2%95%90%E4%B8%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%87%80    %E2%86%B6%I%Break%25Things%'''
            ),
            '''(^◣_◢^)っ︻デ═一    ⇀    ⇀    ⇀    ⇀    ⇀    ↶%I%Break%Things%''')