Ejemplo n.º 1
0
    def test_string_io(self):

        df = DataFrame(np.random.randn(10, 2))
        s = df.to_msgpack(None)
        result = read_msgpack(s)
        tm.assert_frame_equal(result, df)

        s = df.to_msgpack()
        result = read_msgpack(s)
        tm.assert_frame_equal(result, df)

        s = df.to_msgpack()
        result = read_msgpack(compat.BytesIO(s))
        tm.assert_frame_equal(result, df)

        s = to_msgpack(None, df)
        result = read_msgpack(s)
        tm.assert_frame_equal(result, df)

        with ensure_clean(self.path) as p:

            s = df.to_msgpack()
            fh = open(p, 'wb')
            fh.write(s)
            fh.close()
            result = read_msgpack(p)
            tm.assert_frame_equal(result, df)
Ejemplo n.º 2
0
def read_msgpack(path_or_buf, encoding='utf-8', iterator=False, **kwargs):
    """
    Load msgpack pandas object from the specified
    file path

    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
    may not be stable until a future release.

    Parameters
    ----------
    path_or_buf : string File path, BytesIO like or string
    encoding: Encoding for decoding msgpack str type
    iterator : boolean, if True, return an iterator to the unpacker
               (default is False)

    Returns
    -------
    obj : type of object stored in file

    """
    path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
    if iterator:
        return Iterator(path_or_buf)

    def read(fh):
        l = list(unpack(fh, encoding=encoding, **kwargs))
        if len(l) == 1:
            return l[0]
        return l

    # see if we have an actual file
    if isinstance(path_or_buf, compat.string_types):
        try:
            exists = os.path.exists(path_or_buf)
        except (TypeError, ValueError):
            exists = False

        if exists:
            with open(path_or_buf, 'rb') as fh:
                return read(fh)

    if isinstance(path_or_buf, compat.binary_type):
        # treat as a binary-like
        fh = None
        try:
            # We can't distinguish between a path and a buffer of bytes in
            # Python 2 so instead assume the first byte of a valid path is
            # less than 0x80.
            if compat.PY3 or ord(path_or_buf[0]) >= 0x80:
                fh = compat.BytesIO(path_or_buf)
                return read(fh)
        finally:
            if fh is not None:
                fh.close()
    elif hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read):
        # treat as a buffer like
        return read(path_or_buf)

    raise ValueError('path_or_buf needs to be a string file path or file-like')
Ejemplo n.º 3
0
def read_msgpack(path_or_buf, iterator=False, **kwargs):
    """
    Load msgpack pandas object from the specified
    file path

    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
    may not be stable until a future release.

    Parameters
    ----------
    path_or_buf : string File path, BytesIO like or string
    iterator : boolean, if True, return an iterator to the unpacker
               (default is False)

    Returns
    -------
    obj : type of object stored in file

    """
    path_or_buf, _, _ = get_filepath_or_buffer(path_or_buf)
    if iterator:
        return Iterator(path_or_buf)

    def read(fh):
        l = list(unpack(fh, **kwargs))
        if len(l) == 1:
            return l[0]
        return l

    # see if we have an actual file
    if isinstance(path_or_buf, compat.string_types):

        try:
            exists = os.path.exists(path_or_buf)
        except (TypeError, ValueError):
            exists = False

        if exists:
            with open(path_or_buf, 'rb') as fh:
                return read(fh)

    # treat as a binary-like
    if isinstance(path_or_buf, compat.binary_type):
        fh = None
        try:
            fh = compat.BytesIO(path_or_buf)
            return read(fh)
        finally:
            if fh is not None:
                fh.close()

    # a buffer like
    if hasattr(path_or_buf, 'read') and compat.callable(path_or_buf.read):
        return read(path_or_buf)

    raise ValueError('path_or_buf needs to be a string file path or file-like')
Ejemplo n.º 4
0
def _read_zipped_sdmx(path_or_buf):
    """ Unzipp data contains SDMX-XML """
    data = _read_content(path_or_buf)

    zp = compat.BytesIO()
    zp.write(compat.str_to_bytes(data))
    f = zipfile.ZipFile(zp)
    files = f.namelist()
    assert len(files) == 1
    return f.open(files[0])
Ejemplo n.º 5
0
 def testPackUnicode(self):
     test_data = [u(""), u("abcd"), [u("defgh")], u("Русский текст"), ]
     for td in test_data:
         re = unpackb(
             packb(td, encoding='utf-8'), use_list=1, encoding='utf-8')
         assert re == td
         packer = Packer(encoding='utf-8')
         data = packer.pack(td)
         re = Unpacker(
             compat.BytesIO(data), encoding='utf-8', use_list=1).unpack()
         assert re == td
Ejemplo n.º 6
0
    def testArraySize(self, sizes=[0, 5, 50, 1000]):
        bio = compat.BytesIO()
        packer = Packer()
        for size in sizes:
            bio.write(packer.pack_array_header(size))
            for i in range(size):
                bio.write(packer.pack(i))

        bio.seek(0)
        unpacker = Unpacker(bio, use_list=1)
        for size in sizes:
            assert unpacker.unpack() == list(range(size))
Ejemplo n.º 7
0
    def testMapSize(self, sizes=[0, 5, 50, 1000]):
        bio = compat.BytesIO()
        packer = Packer()
        for size in sizes:
            bio.write(packer.pack_map_header(size))
            for i in range(size):
                bio.write(packer.pack(i))  # key
                bio.write(packer.pack(i * 2))  # value

        bio.seek(0)
        unpacker = Unpacker(bio)
        for size in sizes:
            assert unpacker.unpack() == dict((i, i * 2) for i in range(size))
Ejemplo n.º 8
0
    def test_manualreset(self, sizes=[0, 5, 50, 1000]):
        packer = Packer(autoreset=False)
        for size in sizes:
            packer.pack_array_header(size)
            for i in range(size):
                packer.pack(i)

        bio = compat.BytesIO(packer.bytes())
        unpacker = Unpacker(bio, use_list=1)
        for size in sizes:
            assert unpacker.unpack() == list(range(size))

        packer.reset()
        assert packer.bytes() == b''
Ejemplo n.º 9
0
    def test_readbytes(self):
        unpacker = Unpacker(read_size=3)
        unpacker.feed(b'foobar')
        assert unpacker.unpack() == ord(b'f')
        assert unpacker.read_bytes(3) == b'oob'
        assert unpacker.unpack() == ord(b'a')
        assert unpacker.unpack() == ord(b'r')

        # Test buffer refill
        unpacker = Unpacker(compat.BytesIO(b'foobar'), read_size=3)
        assert unpacker.unpack() == ord(b'f')
        assert unpacker.read_bytes(3) == b'oob'
        assert unpacker.unpack() == ord(b'a')
        assert unpacker.unpack() == ord(b'r')
Ejemplo n.º 10
0
def to_msgpack(path_or_buf, *args, **kwargs):
    """
    msgpack (serialize) object to input file path

    THIS IS AN EXPERIMENTAL LIBRARY and the storage format
    may not be stable until a future release.

    Parameters
    ----------
    path_or_buf : string File path, buffer-like, or None
                  if None, return generated string
    args : an object or objects to serialize
    encoding: encoding for unicode objects
    append : boolean whether to append to an existing msgpack
             (default is False)
    compress : type of compressor (zlib or blosc), default to None (no
               compression)
    """
    global compressor
    compressor = kwargs.pop('compress', None)
    if compressor:
        compressor = u(compressor)
    append = kwargs.pop('append', None)
    if append:
        mode = 'a+b'
    else:
        mode = 'wb'

    def writer(fh):
        for a in args:
            fh.write(pack(a, **kwargs))

    path_or_buf = _stringify_path(path_or_buf)
    if isinstance(path_or_buf, compat.string_types):
        with open(path_or_buf, mode) as fh:
            writer(fh)
    elif path_or_buf is None:
        buf = compat.BytesIO()
        writer(buf)
        return buf.getvalue()
    else:
        writer(path_or_buf)
Ejemplo n.º 11
0
    def __init__(self, filepath_or_buffer, index=None, encoding='ISO-8859-1',
                 chunksize=None):

        self._encoding = encoding
        self._lines_read = 0
        self._index = index
        self._chunksize = chunksize

        if isinstance(filepath_or_buffer, str):
            filepath_or_buffer, encoding, compression = get_filepath_or_buffer(
                filepath_or_buffer, encoding=encoding)

        if isinstance(filepath_or_buffer, (str, compat.text_type, bytes)):
            self.filepath_or_buffer = open(filepath_or_buffer, 'rb')
        else:
            # Copy to BytesIO, and ensure no encoding
            contents = filepath_or_buffer.read()
            try:
                contents = contents.encode(self._encoding)
            except:
                pass
            self.filepath_or_buffer = compat.BytesIO(contents)

        self._read_header()
Ejemplo n.º 12
0
def get_data(geo, resolution, session):
    geo = conv_geo(geo)
    resolution = conv_resol(resolution)
    response = download_data(geo, resolution, session)
    logger.info("Request done")

    logger.info("Create stream file")
    zip_data = compat.BytesIO(response.content)

    logger.info("Creating a DataFrame per symbol")

    d = {}
    cols = None
    with ZipFile(zip_data, 'r') as zf:
        filelist = zf.filelist
        df_info = pd.DataFrame(filelist)
        df_info['filename'] = df_info[0].map(lambda x: x.filename)
        df_info['file_size'] = df_info[0].map(lambda x: x.file_size)
        df_info['date_time'] = df_info[0].map(
            lambda x: datetime.datetime(*x.date_time))
        del df_info[0]
        for zinfo in filelist:
            filename = zinfo.filename
            filename_short, filename_ext = os.path.splitext(filename)
            with zf.open(filename) as zfile:
                if filename_ext.lower() == '.txt':
                    file_exchange = filename.split('/')[3]
                    file_symbol = os.path.split(filename_short)[-1].upper()
                    logger.info(
                        "Building DataFrame for '%s' at '%s' from '%s' (%.1f)"
                        % (file_symbol, file_exchange, filename,
                           float(zinfo.file_size) / 1024))
                    if zinfo.file_size > 0:
                        try:
                            if resolution == 'd':
                                df = pd.read_csv(zfile, parse_dates=0)
                            else:
                                df = pd.read_csv(zfile, parse_dates=[[0, 1]])
                                df = df.rename(columns={'Date_Time': 'Date'})
                            df = df.set_index('Date')
                            df['Exchange'] = file_exchange
                            d[file_symbol] = df
                            if cols is None:
                                cols = df.columns
                        except KeyboardInterrupt:
                            logger.error("CTRL+C was pressed - exit")
                            break
                        except Exception as e:
                            logger.error(
                                "Can't build DataFrame for '%s' at '%s' from '%s'"
                                % (file_symbol, file_exchange,
                                   filename.replace(' ', '\ ')))
                            logger.error(traceback.format_exc())
                            d[file_symbol] = None
                            df['Exchange'] = file_exchange
                    else:
                        logger.error(
                            "Can't build DataFrame for '%s' at '%s' from '%s' (empty file)"
                            % (file_symbol, file_exchange,
                               filename.replace(' ', '\ ')))
                        d[file_symbol] = None
                        df['Exchange'] = file_exchange
    logger.info("Create Panel from DataFrame")
    panel = pd.Panel(d)
    panel = panel.transpose(2, 1, 0)
    panel.major_axis = panel.major_axis.map(lambda n: pd.to_datetime(str(n)))
    return (panel, df_info)