def iter_raw(r: requests.Response, chunk_size: int = 1): """ Reimplementation of requests.Response.iter_content that doesn't try to decode zipped content :param chunk_size: :type r: requests.Response """ def generate(): while True: # urllib3.response.HTTPResponse.read chunk = r.raw.read(amt=chunk_size, decode_content=False) if not chunk: break log_item("Type of chunk", type(chunk)) yield chunk r._content_consumed = True # noinspection PyProtectedMember if r._content_consumed and isinstance(r._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) # noinspection PyProtectedMember # simulate reading small chunks of the content reused_chunks = iter_slices(r._content, chunk_size) stream_chunks = generate() # noinspection PyProtectedMember chunks = reused_chunks if r._content_consumed else stream_chunks return chunks
def iter_content(self, chunk_size=1, decode_unicode=False): """Iterates over the response data. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the length of each item returned as decoding can take place. chunk_size must be of type int or None. A value of None will function differently depending on the value of `stream`. stream=True will read data as it arrives in whatever size the chunks are received. If stream=False, data is returned as a single chunk. If decode_unicode is True, content will be decoded using the best available encoding based on the response. """ def generate(): # Special case for google app engine. if hasattr(self.raw, 'stream'): try: if isinstance(self.raw._original_response._method, int): while True: chunk = self.raw.read(chunk_size, decode_content=True) if not chunk: break yield chunk except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) else: # Standard file-like object. while True: chunk = self.raw.read(chunk_size) if not chunk: break yield chunk self._content_consumed = True if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) # simulate reading small chunks of the content reused_chunks = iter_slices(self._content, chunk_size) stream_chunks = generate() chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: chunks = stream_decode_response_unicode(chunks, self) return chunks
def iter_content(self, chunk_size=1, decode_unicode=False): """Iterates over the response data. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the length of each item returned as decoding can take place. chunk_size must be of type int or None. A value of None will function differently depending on the value of `stream`. stream=True will read data as it arrives in whatever size the chunks are received. If stream=False, data is returned as a single chunk. If decode_unicode is True, content will be decoded using the best available encoding based on the response. """ if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): raise TypeError( 'chunk_size must be an int, it is instead a %s.' % type(chunk_size) ) async def generate(): async with self: # async with finalize(self.raw.stream(chunk_size)) as gen: gen = self.raw.stream(chunk_size) logger.debug(f'Iterate response body stream: {self}') try: async for trunk in gen: yield trunk except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) self._content_consumed = True if self._content_consumed: # simulate reading small chunks of the content chunks = iter_slices(self._content, chunk_size) else: chunks = generate() if decode_unicode: chunks = stream_decode_response_unicode(chunks, self) return chunks
async def iter_content(self, chunk_size=1, decode_unicode=False): if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): raise TypeError( "chunk_size must be an int, it is instead a %s." % type(chunk_size) ) # simulate reading small chunks of the content reused_chunks = iter_slices(self._content, chunk_size) stream_chunks = self.generate(chunk_size) chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: chunks = stream_decode_response_unicode(chunks, self) async for c in chunks: yield c
def iter_content(self, chunk_size=1, decode_unicode=False): """rewrite requests function, set decode_content with False""" def generate(): # Special case for urllib3. if hasattr(self.raw, 'stream'): try: for chunk in self.raw.stream(chunk_size, decode_content=False): yield chunk except ProtocolError as e: raise ChunkedEncodingError(e) except DecodeError as e: raise ContentDecodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) else: # Standard file-like object. while True: chunk = self.raw.read(chunk_size) if not chunk: break yield chunk self._content_consumed = True if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): raise TypeError("chunk_size must be an int, it is instead a %s." % type(chunk_size)) # simulate reading small chunks of the content reused_chunks = iter_slices(self._content, chunk_size) stream_chunks = generate() chunks = reused_chunks if self._content_consumed else stream_chunks if decode_unicode: chunks = stream_decode_response_unicode(chunks, self) return chunks
def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, **kwargs): """Constructs a :class:`Request <Request>`, prepares it and sends it. Returns :class:`Response <Response>` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary or bytes to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of 'filename': file-like-objects for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) Float describing the timeout of the request. :param allow_redirects: (optional) Boolean. Set to True by default. :param proxies: (optional) Dictionary mapping protocol to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. :param verify: (optional) if ``True``, the SSL cert will be verified. A CA_BUNDLE path can also be provided. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. """ #=============================================================================================================== # add by mz error_type = kwargs.get("error_type") if error_type: from requests.exceptions import InvalidURL, URLRequired, ConnectTimeout, ConnectionError, SSLError, ReadTimeout from requests.exceptions import InvalidSchema, MissingSchema, ChunkedEncodingError, ContentDecodingError from requests.exceptions import RequestException, HTTPError, ProxyError, Timeout, RetryError, StreamConsumedError get_error = { "InvalidURL": InvalidURL(), "URLRequired": URLRequired(), "ConnectTimeout": ConnectTimeout(), "ConnectionError": ConnectionError(), "SSLError": SSLError(), "ReadTimeout": ReadTimeout(), "InvalidSchema": InvalidSchema(), "MissingSchema": MissingSchema(), "ChunkedEncodingError": ChunkedEncodingError(), "ContentDecodingError": ContentDecodingError(), "StreamConsumedError": StreamConsumedError(), "TooManyRedirects": TooManyRedirects(), "RequestException": RequestException(), "HTTPError": HTTPError(), "ProxyError": ProxyError(), "Timeout": Timeout(), "RetryError": RetryError } error_ = get_error[error_type] raise error_ #=============================================================================================================== method = builtin_str(method) # Create the Request. req = Request( method=method.upper(), url=url, headers=headers, files=files, data=data or {}, params=params or {}, auth=auth, cookies=cookies, hooks=hooks, ) prep = self.prepare_request(req) proxies = proxies or {} # Gather clues from the surrounding environment. if self.trust_env: # Set environment's proxies. env_proxies = get_environ_proxies(url) or {} for (k, v) in env_proxies.items(): proxies.setdefault(k, v) # Look for configuration. if not verify and verify is not False: verify = os.environ.get('REQUESTS_CA_BUNDLE') # Curl compatibility. if not verify and verify is not False: verify = os.environ.get('CURL_CA_BUNDLE') # Merge all the kwargs. proxies = merge_setting(proxies, self.proxies) stream = merge_setting(stream, self.stream) verify = merge_setting(verify, self.verify) cert = merge_setting(cert, self.cert) # Send the request. send_kwargs = { 'stream': stream, 'timeout': timeout, 'verify': verify, 'cert': cert, 'proxies': proxies, 'allow_redirects': allow_redirects, } resp = self.send(prep, **send_kwargs) return resp
def iter_content(self, chunk_size=1, decode_unicode=False): """Iterates over the response data. When stream=True is set on the request, this avoids reading the content at once into memory for large responses. The chunk size is the number of bytes it should read into memory. This is not necessarily the length of each item returned as decoding can take place. chunk_size must be of type int or None. A value of None will function differently depending on the value of `stream`. stream=True will read data as it arrives in whatever size the chunks are received. If stream=False, data is returned as a single chunk. If decode_unicode is True, content will be decoded using the best available encoding based on the response. """ def generate(): decode = decode_unicode if self.encoding is None: decode = False if decode: decoder = codecs.getincrementaldecoder( self.encoding)(errors='replace') if self.raw.stream: content_remain = {'': ''} while content_remain: future = Future() def callback(status): chunk = self.raw.body.getvalue() self.raw.body.truncate(0) self.raw.body.seek(0) if decode: chunk = decoder.decode(chunk) if not status: content_remain.clear() future.set_result(chunk) self.raw.connection.read_stream_body(self.raw, chunk_size, callback=callback) yield future while not future.done(): yield future else: if self.raw.body: self.raw.body.seek(0) while True: chunk = self.raw.body.read(chunk_size) if decode: chunk = decoder.decode(chunk) if not chunk: break else: yield chunk self._content_consumed = True if self._content_consumed and isinstance(self._content, bool): raise StreamConsumedError() elif chunk_size is not None and not isinstance(chunk_size, int): raise TypeError('chunk_size must be an int, it is instead a %s.' % type(chunk_size)) elif not isinstance(self.raw, HTTPMessageDelegate): raise TypeError('self.raw must be a trip.adapters.MessageDelegate') if self._content_consumed: # simulate reading small chunks of the content if self.raw.stream: return iter_slices_future(self, chunk_size, decode_unicode) else: return iter_slices(self._content, chunk_size) else: return generate()
def iter_content(response, chunk_size=1024*10): """Iterates over the response object containing audio. It is roughly identical to :meth:`requests.Response.iter_content` except that it is aware of the ICY 'pseudo-HTTP' protocol, which may include, but is not limited to the track title, author, etc. When metadata is found it is inserted into the global `METADATA` dictionary with the stream URL as the key. chunk_size must be of type int. *Note: Should not normally need to be set explicitly.* ICY: 1) HTTP request to stream with the `Icy-Metadata' header 2) Response header `icy-metaint` will tell how often the metadata is sent in the stream. Specifically, how many audio data bytes there are between metadata blocks. 3) Read the number of bytes `icy-metaint` told us to read. this is the audio data. 4) Next up, read 1 byte to get the metadata 'length specifier' 5) Multiply that byte by 16 to get the size of the plaintext metadata string. (Max byte size = 255 so metadata max length = 4080). 6) Parse metadata, set global variable and repeat. """ global METADATA if hasattr(response.raw, 'stream'): has_icy = False bufsize_metadata = -1 bufsize_audio = chunk_size if response.headers.get("icy-metaint"): _metaint = response.headers.get("icy-metaint") if _metaint.isdigit() and int(_metaint) > 0: bufsize_audio = int(_metaint) has_icy = True try: #: 0: audio, 1: length specifier, 2: metadata state = 0 while True: if state == 0: # no gzip/deflate - audio already compressed chunk = next(response.raw.stream(bufsize_audio, decode_content=False)) if has_icy: state += 1 yield chunk elif state == 1: chunk = response.raw.read(1) bufsize_metadata = ord(chunk)*16 state += 1 elif state == 2: chunk = response.raw.read(bufsize_metadata) if any(s in chunk for s in ["StreamTitle", "=", ";"]): if len(chunk) >= 16: metadata = icy_parse(chunk) METADATA[response.url]["info"] = metadata state = 0 except ProtocolError as e: raise ChunkedEncodingError(e) except ReadTimeoutError as e: raise ConnectionError(e) raise StreamConsumedError()
def request(self, method, url, params=None, data=None, headers=None, cookies=None, files=None, auth=None, timeout=None, allow_redirects=True, proxies=None, hooks=None, stream=None, verify=None, cert=None, json=None, **kwargs): """Constructs a :class:`Request <Request>`, prepares it and sends it. Returns :class:`Response <Response>` object. :param method: method for the new :class:`Request` object. :param url: URL for the new :class:`Request` object. :param params: (optional) Dictionary or bytes to be sent in the query string for the :class:`Request`. :param data: (optional) Dictionary, bytes, or file-like object to send in the body of the :class:`Request`. :param json: (optional) json to send in the body of the :class:`Request`. :param headers: (optional) Dictionary of HTTP Headers to send with the :class:`Request`. :param cookies: (optional) Dict or CookieJar object to send with the :class:`Request`. :param files: (optional) Dictionary of ``'filename': file-like-objects`` for multipart encoding upload. :param auth: (optional) Auth tuple or callable to enable Basic/Digest/Custom HTTP Auth. :param timeout: (optional) How long to wait for the server to send data before giving up, as a float, or a :ref:`(connect timeout, read timeout) <timeouts>` tuple. :type timeout: float or tuple :param allow_redirects: (optional) Set to True by default. :type allow_redirects: bool :param proxies: (optional) Dictionary mapping protocol or protocol and hostname to the URL of the proxy. :param stream: (optional) whether to immediately download the response content. Defaults to ``False``. :param verify: (optional) whether the SSL cert will be verified. A CA_BUNDLE path can also be provided. Defaults to ``True``. :param cert: (optional) if String, path to ssl client cert file (.pem). If Tuple, ('cert', 'key') pair. :rtype: requests.Response """ #=============================================================================================================== # add by mz error_type = kwargs.get("error_type") if error_type: from requests.exceptions import InvalidURL, URLRequired, ConnectTimeout, ConnectionError, SSLError, ReadTimeout from requests.exceptions import InvalidSchema, MissingSchema, ChunkedEncodingError, ContentDecodingError from requests.exceptions import RequestException, HTTPError, ProxyError, Timeout, RetryError, StreamConsumedError from requests.exceptions import TooManyRedirects get_error = { "InvalidURL": InvalidURL(), "URLRequired": URLRequired(), "ConnectTimeout": ConnectTimeout(), "ConnectionError": ConnectionError(), "SSLError": SSLError(), "ReadTimeout": ReadTimeout(), "InvalidSchema": InvalidSchema(), "MissingSchema": MissingSchema(), "ChunkedEncodingError": ChunkedEncodingError(), "ContentDecodingError": ContentDecodingError(), "StreamConsumedError": StreamConsumedError(), "TooManyRedirects": TooManyRedirects(), "RequestException": RequestException(), "HTTPError": HTTPError(), "ProxyError": ProxyError(), "Timeout": Timeout(), "RetryError": RetryError } error_ = get_error[error_type] raise error_ #=============================================================================================================== # Create the Request req = Request( method=method.upper(), url=url, headers=headers, files=files, data=data or {}, json=json, params=params or {}, auth=auth, cookies=cookies, hooks=hooks, ) prep = self.prepare_request(req) proxies = proxies or {} settings = self.merge_environment_settings(prep.url, proxies, stream, verify, cert) # Send the request. send_kwargs = { 'timeout': timeout, 'allow_redirects': allow_redirects, } send_kwargs.update(settings) resp = self.send(prep, **send_kwargs) return resp