def test_string_io(self): df = DataFrame(np.random.randn(10, 2)) s = df.to_msgpack(None) result = read_msgpack(s) tm.assert_frame_equal(result, df) s = df.to_msgpack() result = read_msgpack(s) tm.assert_frame_equal(result, df) s = df.to_msgpack() result = read_msgpack(compat.BytesIO(s)) tm.assert_frame_equal(result, df) s = to_msgpack(None, df) result = read_msgpack(s) tm.assert_frame_equal(result, df) with ensure_clean(self.path) as p: s = df.to_msgpack() fh = open(p, 'wb') fh.write(s) fh.close() result = read_msgpack(p) tm.assert_frame_equal(result, df)
def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs): """ Load msgpack pandas object from the specified file path THIS IS AN EXPERIMENTAL LIBRARY and the storage format may not be stable until a future release. Parameters ---------- path_or_buf : string File path, BytesIO like or string encoding: Encoding for decoding msgpack str type iterator : boolean, if True, return an iterator to the unpacker (default is False) Returns ------- obj : type of object stored in file """ path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) def read(fh): l = list(unpack(fh, encoding=encoding, **kwargs)) if len(l) == 1: return l[0] return l # see if we have an actual file if isinstance(path_or_buf, compat.string_types): try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): exists = False if exists: with open(path_or_buf, 'rb') as fh: return read(fh) if isinstance(path_or_buf, compat.binary_type): # treat as a binary-like fh = None try: # We can't distinguish between a path and a buffer of bytes in # Python 2 so instead assume the first byte of a valid path is # less than 0x80. if compat.PY3 or ord(path_or_buf[0]) >= 0x80: fh = compat.BytesIO(path_or_buf) return read(fh) finally: if fh is not None: fh.close() elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): # treat as a buffer like return read(path_or_buf) raise ValueError('path_or_buf needs to be a string file path or file-like')
def read_msgpack(path_or_buf, iterator=False, **kwargs): """ Load msgpack pandas object from the specified file path THIS IS AN EXPERIMENTAL LIBRARY and the storage format may not be stable until a future release. Parameters ---------- path_or_buf : string File path, BytesIO like or string iterator : boolean, if True, return an iterator to the unpacker (default is False) Returns ------- obj : type of object stored in file """ path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf) if iterator: return Iterator(path_or_buf) def read(fh): l = list(unpack(fh, **kwargs)) if len(l) == 1: return l[0] return l # see if we have an actual file if isinstance(path_or_buf, compat.string_types): try: exists = os.path.exists(path_or_buf) except (TypeError, ValueError): exists = False if exists: with open(path_or_buf, 'rb') as fh: return read(fh) # treat as a binary-like if isinstance(path_or_buf, compat.binary_type): fh = None try: fh = compat.BytesIO(path_or_buf) return read(fh) finally: if fh is not None: fh.close() # a buffer like if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read): return read(path_or_buf) raise ValueError('path_or_buf needs to be a string file path or file-like')
def _read_zipped_sdmx(path_or_buf): """ Unzipp data contains SDMX-XML """ data = _read_content(path_or_buf) zp = compat.BytesIO() zp.write(compat.str_to_bytes(data)) f = zipfile.ZipFile(zp) files = f.namelist() assert len(files) == 1 return f.open(files[0])
def testPackUnicode(self): test_data = [u(""), u("abcd"), [u("defgh")], u("Русский текст"), ] for td in test_data: re = unpackb( packb(td, encoding='utf-8'), use_list=1, encoding='utf-8') assert re == td packer = Packer(encoding='utf-8') data = packer.pack(td) re = Unpacker( compat.BytesIO(data), encoding='utf-8', use_list=1).unpack() assert re == td
def testArraySize(self, sizes=[0, 5, 50, 1000]): bio = compat.BytesIO() packer = Packer() for size in sizes: bio.write(packer.pack_array_header(size)) for i in range(size): bio.write(packer.pack(i)) bio.seek(0) unpacker = Unpacker(bio, use_list=1) for size in sizes: assert unpacker.unpack() == list(range(size))
def testMapSize(self, sizes=[0, 5, 50, 1000]): bio = compat.BytesIO() packer = Packer() for size in sizes: bio.write(packer.pack_map_header(size)) for i in range(size): bio.write(packer.pack(i)) # key bio.write(packer.pack(i * 2)) # value bio.seek(0) unpacker = Unpacker(bio) for size in sizes: assert unpacker.unpack() == dict((i, i * 2) for i in range(size))
def test_manualreset(self, sizes=[0, 5, 50, 1000]): packer = Packer(autoreset=False) for size in sizes: packer.pack_array_header(size) for i in range(size): packer.pack(i) bio = compat.BytesIO(packer.bytes()) unpacker = Unpacker(bio, use_list=1) for size in sizes: assert unpacker.unpack() == list(range(size)) packer.reset() assert packer.bytes() == b''
def test_readbytes(self): unpacker = Unpacker(read_size=3) unpacker.feed(b'foobar') assert unpacker.unpack() == ord(b'f') assert unpacker.read_bytes(3) == b'oob' assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r') # Test buffer refill unpacker = Unpacker(compat.BytesIO(b'foobar'), read_size=3) assert unpacker.unpack() == ord(b'f') assert unpacker.read_bytes(3) == b'oob' assert unpacker.unpack() == ord(b'a') assert unpacker.unpack() == ord(b'r')
def to_msgpack(path_or_buf, *args, **kwargs): """ msgpack (serialize) object to input file path THIS IS AN EXPERIMENTAL LIBRARY and the storage format may not be stable until a future release. Parameters ---------- path_or_buf : string File path, buffer-like, or None if None, return generated string args : an object or objects to serialize encoding: encoding for unicode objects append : boolean whether to append to an existing msgpack (default is False) compress : type of compressor (zlib or blosc), default to None (no compression) """ global compressor compressor = kwargs.pop('compress', None) if compressor: compressor = u(compressor) append = kwargs.pop('append', None) if append: mode = 'a+b' else: mode = 'wb' def writer(fh): for a in args: fh.write(pack(a, **kwargs)) path_or_buf = _stringify_path(path_or_buf) if isinstance(path_or_buf, compat.string_types): with open(path_or_buf, mode) as fh: writer(fh) elif path_or_buf is None: buf = compat.BytesIO() writer(buf) return buf.getvalue() else: writer(path_or_buf)
def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1', chunksize=None): self._encoding = encoding self._lines_read = 0 self._index = index self._chunksize = chunksize if isinstance(filepath_or_buffer, str): filepath_or_buffer, encoding, compression = get_filepath_or_buffer( filepath_or_buffer, encoding=encoding) if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)): self.filepath_or_buffer = open(filepath_or_buffer, 'rb') else: # Copy to BytesIO, and ensure no encoding contents = filepath_or_buffer.read() try: contents = contents.encode(self._encoding) except: pass self.filepath_or_buffer = compat.BytesIO(contents) self._read_header()
def get_data(geo, resolution, session): geo = conv_geo(geo) resolution = conv_resol(resolution) response = download_data(geo, resolution, session) logger.info("Request done") logger.info("Create stream file") zip_data = compat.BytesIO(response.content) logger.info("Creating a DataFrame per symbol") d = {} cols = None with ZipFile(zip_data, 'r') as zf: filelist = zf.filelist df_info = pd.DataFrame(filelist) df_info['filename'] = df_info[0].map(lambda x: x.filename) df_info['file_size'] = df_info[0].map(lambda x: x.file_size) df_info['date_time'] = df_info[0].map( lambda x: datetime.datetime(*x.date_time)) del df_info[0] for zinfo in filelist: filename = zinfo.filename filename_short, filename_ext = os.path.splitext(filename) with zf.open(filename) as zfile: if filename_ext.lower() == '.txt': file_exchange = filename.split('/')[3] file_symbol = os.path.split(filename_short)[-1].upper() logger.info( "Building DataFrame for '%s' at '%s' from '%s' (%.1f)" % (file_symbol, file_exchange, filename, float(zinfo.file_size) / 1024)) if zinfo.file_size > 0: try: if resolution == 'd': df = pd.read_csv(zfile, parse_dates=0) else: df = pd.read_csv(zfile, parse_dates=[[0, 1]]) df = df.rename(columns={'Date_Time': 'Date'}) df = df.set_index('Date') df['Exchange'] = file_exchange d[file_symbol] = df if cols is None: cols = df.columns except KeyboardInterrupt: logger.error("CTRL+C was pressed - exit") break except Exception as e: logger.error( "Can't build DataFrame for '%s' at '%s' from '%s'" % (file_symbol, file_exchange, filename.replace(' ', '\ '))) logger.error(traceback.format_exc()) d[file_symbol] = None df['Exchange'] = file_exchange else: logger.error( "Can't build DataFrame for '%s' at '%s' from '%s' (empty file)" % (file_symbol, file_exchange, filename.replace(' ', '\ '))) d[file_symbol] = None df['Exchange'] = file_exchange logger.info("Create Panel from DataFrame") panel = pd.Panel(d) panel = panel.transpose(2, 1, 0) panel.major_axis = panel.major_axis.map(lambda n: pd.to_datetime(str(n))) return (panel, df_info)