Beispiel #1
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            try:
                r = urlopen(url, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(url, timeout=self.timeout)

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if text:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text or r

        content_type = get_response_content_type(r)

        if 'xml' in content_type:
            self.ext = 'xml'
        elif 'json' in content_type:
            self.ext = 'json'
        else:
            self.ext = content_type.split('/')[1].split(';')[0]

        self.r = r
        return response
Beispiel #2
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            try:
                r = urlopen(url, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(url, timeout=self.timeout)

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if self.cache_type:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text if self.cache_type else r

        self.r = r
        return response
Beispiel #3
0
    def test_reencode(self):
        file_ = p.join(io.DATA_DIR, "utf16_big.csv")

        with open(file_, encoding="utf-16-be") as f:
            utf8_f = io.reencode(f, remove_BOM=True)
            nt.assert_equal(b"a,b,c", next(utf8_f).strip())
            nt.assert_equal(b"1,2,3", next(utf8_f).strip())
            nt.assert_equal("4,5,ʤ", next(utf8_f).decode("utf-8"))
Beispiel #4
0
    def test_reencode(self):
        file_ = p.join(io.DATA_DIR, 'utf16_big.csv')

        with open(file_, encoding='utf-16-be') as f:
            utf8_f = io.reencode(f, remove_BOM=True)
            nt.assert_equal(b'a,b,c', next(utf8_f).strip())
            nt.assert_equal(b'1,2,3', next(utf8_f).strip())
            nt.assert_equal('4,5,ʤ', next(utf8_f).decode('utf-8'))
Beispiel #5
0
def opener(url,
           memoize=False,
           delay=0,
           encoding=ENCODING,
           params=None,
           **kwargs):
    params = params or {}
    timeout = kwargs.get('timeout')
    decode = kwargs.get('decode')

    if url.startswith('http') and params:
        r = requests.get(url, params=params, stream=True)
        r.raw.decode_content = decode
        response = r.text if memoize else r.raw
    else:
        req = Request(url, headers={'User-Agent': default_user_agent()})
        context = SleepyDict(delay=delay) if delay else None

        try:
            r = urlopen(req, context=context, timeout=timeout)
        except TypeError:
            r = urlopen(req, timeout=timeout)
        except HTTPError as e:
            raise URLError(f'{url} returned {e.code}: {e.reason}')
        except URLError as e:
            raise URLError(f'{url}: {e.reason}')

        text = r.read() if memoize else None

        if decode:
            encoding = get_response_encoding(r, encoding)

            if text:
                response = compat.decode(text, encoding)
            else:
                response = reencode(r.fp, encoding, decode=True)
                response.r = r
        else:
            response = text or r

    content_type = get_response_content_type(r)
    return (response, content_type)
Beispiel #6
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            req = Request(url, headers={'User-Agent': default_user_agent()})
            try:
                r = urlopen(req, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(req, timeout=self.timeout)
            except HTTPError as e:
                msg = '{} returned {}: {}'
                raise URLError(msg.format(url, e.code, e.reason))
            except URLError as e:
                raise URLError('{}: {}'.format(url, e.reason))

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if text:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text or r

        content_type = get_response_content_type(r)

        if 'xml' in content_type:
            self.ext = 'xml'
        elif 'json' in content_type:
            self.ext = 'json'
        else:
            self.ext = content_type.split('/')[1].split(';')[0]

        self.r = r
        return response
Beispiel #7
0
    def open(self, url, **params):
        if url.startswith('http') and params:
            r = requests.get(url, params=params, stream=True)
            r.raw.decode_content = self.decode
            response = r.text if self.cache_type else r.raw
        else:
            try:
                r = urlopen(url, context=self.context, timeout=self.timeout)
            except TypeError:
                r = urlopen(url, timeout=self.timeout)
            except HTTPError as e:
                msg = '{} returned {}: {}'
                raise URLError(msg.format(url, e.code, e.reason))
            except URLError as e:
                raise URLError('{}: {}'.format(url, e.reason))

            text = r.read() if self.cache_type else None

            if self.decode:
                encoding = get_response_encoding(r, self.def_encoding)

                if text:
                    response = decode(text, encoding)
                else:
                    response = reencode(r.fp, encoding, decode=True)
            else:
                response = text or r

        content_type = get_response_content_type(r)

        if 'xml' in content_type:
            self.ext = 'xml'
        elif 'json' in content_type:
            self.ext = 'json'
        else:
            self.ext = content_type.split('/')[1].split(';')[0]

        self.r = r
        return response