def filepath_to_buffer(filepath, encoding=None, compression=None, timeout=None, start_byte=0): if not is_str(filepath): #if start_byte: # filepath.seek(start_byte) return filepath, encoding, compression, filepath.size() if _is_url(filepath): headers = None if start_byte: headers = {"Range": "bytes={}-".format(start_byte)} req = requests.get(filepath, stream=True, headers=headers, timeout=timeout) content_encoding = req.headers.get('Content-Encoding', None) if content_encoding == 'gzip': compression = 'gzip' size = req.headers.get('Content-Length', 0) #return HttpDesc(req.raw, filepath), encoding, compression, int(size) return req.raw, encoding, compression, int(size) if _is_s3_url(filepath): from pandas.io import s3 reader, encoding, compression = s3_get_filepath_or_buffer( filepath, encoding=encoding, compression=compression) return reader, encoding, compression, reader.size if _is_buffer_url(filepath): buffer = _url_to_buffer(filepath) return buffer, encoding, compression, buffer.size() filepath = os.path.expanduser(filepath) if not os.path.exists(filepath): raise ValueError("wrong filepath: {}".format(filepath)) size = os.stat(filepath).st_size stream = io.FileIO(filepath) if start_byte: stream.seek(start_byte) return stream, encoding, compression, size
def test_is_s3_url(self): from pandas.io.common import _is_s3_url self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com"))
def test_is_s3_url(self): assert _is_s3_url("s3://pandas/somethingelse.com") assert not _is_s3_url("s4://pandas/somethingelse.com")
def test_is_s3_url(self): self.assertTrue(_is_s3_url("s3://pandas/somethingelse.com")) self.assertFalse(_is_s3_url("s4://pandas/somethingelse.com"))