Python import_lzma Examples, pandas.compat.import_lzma Python Examples

Example #1

0

Show file

File: compression.py Project: aterrel/pandas

    def test_xz(self):
        lzma = compat.import_lzma()

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='xz')
            tm.assert_frame_equal(result, expected)

            with open(path, 'rb') as f:
                result = self.read_csv(f, compression='xz')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.xz') as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)

Example #2

0

Show file

def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError(
                    'Zero files found in ZIP file {}'.format(path))
            else:
                raise ValueError(
                    'Multiple files found in ZIP file.'
                    ' Only one file per ZIP :{}'.format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' % compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f

Example #3

0

Show file

    def test_xz(self):
        lzma = compat.import_lzma()

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean() as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()

            result = self.read_csv(path, compression='xz')
            tm.assert_frame_equal(result, expected)

            with open(path, 'rb') as f:
                result = self.read_csv(f, compression='xz')
                tm.assert_frame_equal(result, expected)

        with tm.ensure_clean('test.xz') as path:
            tmp = lzma.LZMAFile(path, mode='wb')
            tmp.write(data)
            tmp.close()
            result = self.read_csv(path, compression='infer')
            tm.assert_frame_equal(result, expected)

Example #4

0

Show file

File: common.py Project: AkiraKane/pandas

def _get_handle(path, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' %
                             compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            f = MMapWrapper(f)
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f

Example #5

0

Show file

File: test_compression.py Project: changhiskhan/pandas

def lzma_file():
    """
    Try to load the `LZMAFile` class from `backports.lzma`.

    Returns
    -------
    klass : type or None
    """
    try:
        lzma = compat.import_lzma()
    except ImportError:
        lzma = None

    return getattr(lzma, "LZMAFile", None)

Example #6

0

Show file

File: test_compression.py Project: LioraR/Data-Processing

def lzma_file():
    """
    Try to load the `LZMAFile` class from `backports.lzma`.

    Returns
    -------
    klass : type or None
    """
    try:
        lzma = compat.import_lzma()
    except ImportError:
        lzma = None

    return getattr(lzma, "LZMAFile", None)

Example #7

0

Show file

File: common.py Project: SKNIRBHAY/TechWise-1

def _get_handle(path, mode, encoding=None, compression=None):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' %
                             compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    return f

Example #8

0

Show file

File: common.py Project: cpaulik/pandas

def _get_handle(path, mode, encoding=None, compression=None):
    """Gets file handle for given path and mode.
    """
    if compression is not None:
        if encoding is not None and not compat.PY3:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        if compression == 'gzip':
            import gzip
            f = gzip.GzipFile(path, mode)
        elif compression == 'bz2':
            import bz2
            f = bz2.BZ2File(path, mode)
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                file_name = zip_names.pop()
                f = zip_file.open(file_name)
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path, mode)
        else:
            raise ValueError('Unrecognized compression type: %s' %
                             compression)
        if compat.PY3:
            from io import TextIOWrapper
            f = TextIOWrapper(f, encoding=encoding)
        return f
    else:
        if compat.PY3:
            if encoding:
                f = open(path, mode, encoding=encoding)
            else:
                f = open(path, mode, errors='replace')
        else:
            f = open(path, mode)

    return f

Example #9

0

Show file

File: test_compression.py Project: cpcloud/pandas

def decompress_file(path, compression):
    if compression is None:
        f = open(path, 'rb')
    elif compression == 'gzip':
        import gzip
        f = gzip.GzipFile(path, 'rb')
    elif compression == 'bz2':
        import bz2
        f = bz2.BZ2File(path, 'rb')
    elif compression == 'xz':
        lzma = compat.import_lzma()
        f = lzma.open(path, 'rb')
    else:
        msg = 'Unrecognized compression type: {}'.format(compression)
        raise ValueError(msg)

    result = f.read().decode('utf8')
    f.close()
    return result

Example #10

0

Show file

File: test_compression.py Project: Goutham2591/OMK_PART2

def decompress_file(path, compression):
    if compression is None:
        f = open(path, 'rb')
    elif compression == 'gzip':
        import gzip
        f = gzip.GzipFile(path, 'rb')
    elif compression == 'bz2':
        import bz2
        f = bz2.BZ2File(path, 'rb')
    elif compression == 'xz':
        lzma = compat.import_lzma()
        f = lzma.open(path, 'rb')
    else:
        msg = 'Unrecognized compression type: {}'.format(compression)
        raise ValueError(msg)

    result = f.read().decode('utf8')
    f.close()
    return result

Example #11

0

Show file

File: test_to_csv.py Project: cpcloud/pandas

    def test_to_csv_compression_xz(self):
        # GH11852
        # use the compression kw in to_csv
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression="xz")

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename, compression="xz", index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is xzipped
            lzma = compat.import_lzma()
            f = lzma.open(filename, 'rb')
            assert_frame_equal(df, read_csv(f, index_col=0))
            f.close()

Example #12

0

Show file

File: test_to_csv.py Project: lauziming/pandas

    def test_to_csv_compression_xz(self):
        # GH11852
        # use the compression kw in to_csv
        tm._skip_if_no_lzma()
        df = DataFrame([[0.123456, 0.234567, 0.567567],
                        [12.32112, 123123.2, 321321.2]],
                       index=['A', 'B'], columns=['X', 'Y', 'Z'])

        with ensure_clean() as filename:

            df.to_csv(filename, compression="xz")

            # test the round trip - to_csv -> read_csv
            rs = read_csv(filename, compression="xz", index_col=0)
            assert_frame_equal(df, rs)

            # explicitly make sure file is xzipped
            lzma = compat.import_lzma()
            f = lzma.open(filename, 'rb')
            assert_frame_equal(df, read_csv(f, index_col=0))
            f.close()

Example #13

0

Show file

File: _test_decorators.py Project: BinEP/MHealthTest

def _skip_if_no_lzma():
    try:
        import_lzma()
    except ImportError:
        return True

Example #14

0

Show file

File: common.py Project: jakevdp/pandas

def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : {'infer', 'gzip', 'bz2', 'zip', 'xz', None}, default None
        If 'infer' and `filepath_or_buffer` is path-like, then detect
        compression from the following extensions: '.gz', '.bz2', '.zip',
        or '.xz' (otherwise no compression).
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, compat.string_types)

    if is_path:
        compression = _infer_compression(path_or_buf, compression)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            # Ensure the container is closed as well.
            handles.append(zf)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            mode = "wb" if mode == "w" else mode
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding, newline="")
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace', newline="")
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if (compat.PY3 and is_text and
            (compression or isinstance(f, need_text_wrapping))):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding, newline='')
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles

Example #15

0

Show file

def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False, is_text=True):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : str or None
        Supported compression protocols are gzip, bz2, zip, and xz
    memory_map : boolean, default False
        See parsers._parser_params for more information.
    is_text : boolean, default True
        whether file/buffer is in text format (csv, json, etc.), or in binary
        mode (pickle, etc.)

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were opened in this function.
    """
    try:
        from s3fs import S3File
        need_text_wrapping = (BytesIO, S3File)
    except ImportError:
        need_text_wrapping = (BytesIO,)

    handles = list()
    f = path_or_buf

    # Convert pathlib.Path/py.path.local or string
    path_or_buf = _stringify_path(path_or_buf)
    is_path = isinstance(path_or_buf, compat.string_types)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            zf = BytesZipFile(path_or_buf, mode)
            if zf.mode == 'w':
                f = zf
            elif zf.mode == 'r':
                zip_names = zf.namelist()
                if len(zip_names) == 1:
                    f = zf.open(zip_names.pop())
                elif len(zip_names) == 0:
                    raise ValueError('Zero files found in ZIP file {}'
                                     .format(path_or_buf))
                else:
                    raise ValueError('Multiple files found in ZIP file.'
                                     ' Only one file per ZIP: {}'
                                     .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding)
        elif is_text:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace')
        else:
            # Python 3 and binary mode
            f = open(path_or_buf, mode)
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and is_text and\
            (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding)
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles

Example #16

0

Show file

File: test_pickle.py Project: zjfjyc/pandas

import pandas.util._test_decorators as td

import pandas as pd
from pandas import (
    Index,
    Series,
    period_range,
)
import pandas._testing as tm

from pandas.tseries.offsets import (
    Day,
    MonthEnd,
)

lzma = import_lzma()


@pytest.fixture(scope="module")
def current_pickle_data():
    # our current version pickle data
    from pandas.tests.io.generate_legacy_storage_files import create_pickle_data

    return create_pickle_data()


# ---------------------
# comparison functions
# ---------------------
def compare_element(result, expected, typ, version=None):
    if isinstance(expected, Index):

Example #17

0

Show file

"""
Tests compressed data parsing functionality for all
of the parsers defined in parsers.py
"""

import pytest

import pandas as pd
import pandas.compat as compat
import pandas.util.testing as tm
import pandas.util._test_decorators as td

import gzip
import bz2
try:
    lzma = compat.import_lzma()
except ImportError:
    lzma = None


class CompressionTests(object):
    def test_zip(self):
        import zipfile

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean('test_file.zip') as path:
            tmp = zipfile.ZipFile(path, mode='w')
            tmp.writestr('test_file', data)

Example #18

0

Show file

File: common.py Project: andrewkittredge/pandas

def _get_handle(path_or_buf, mode, encoding=None, compression=None,
                memory_map=False):
    """
    Get file handle for given path/buffer and mode.

    Parameters
    ----------
    path_or_buf :
        a path (str) or buffer
    mode : str
        mode to open path_or_buf with
    encoding : str or None
    compression : str or None
        Supported compression protocols are gzip, bz2, zip, and xz
    memory_map : boolean, default False
        See parsers._parser_params for more information.

    Returns
    -------
    f : file-like
        A file-like object
    handles : list of file-like objects
        A list of file-like object that were openned in this function.
    """

    handles = list()
    f = path_or_buf
    is_path = isinstance(path_or_buf, compat.string_types)

    if compression:

        if compat.PY2 and not is_path and encoding:
            msg = 'compression with encoding is not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip
            if is_path:
                f = gzip.open(path_or_buf, mode)
            else:
                f = gzip.GzipFile(fileobj=path_or_buf)

        # BZ Compression
        elif compression == 'bz2':
            import bz2
            if is_path:
                f = bz2.BZ2File(path_or_buf, mode)
            elif compat.PY2:
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually
                f = StringIO(bz2.decompress(path_or_buf.read()))
                path_or_buf.close()
            else:
                f = bz2.BZ2File(path_or_buf)

        # ZIP Compression
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(path_or_buf)
            zip_names = zip_file.namelist()
            if len(zip_names) == 1:
                f = zip_file.open(zip_names.pop())
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(path_or_buf))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP: {}'
                                 .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(path_or_buf, mode)

        # Unrecognized Compression
        else:
            msg = 'Unrecognized compression type: {}'.format(compression)
            raise ValueError(msg)

        handles.append(f)

    elif is_path:
        if compat.PY2:
            # Python 2
            f = open(path_or_buf, mode)
        elif encoding:
            # Python 3 and encoding
            f = open(path_or_buf, mode, encoding=encoding)
        else:
            # Python 3 and no explicit encoding
            f = open(path_or_buf, mode, errors='replace')
        handles.append(f)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and (compression or isinstance(f, need_text_wrapping)):
        from io import TextIOWrapper
        f = TextIOWrapper(f, encoding=encoding)
        handles.append(f)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f, handles

Example #19

0

Show file

File: _test_decorators.py Project: MasonGallo/pandas

def _skip_if_no_lzma():
    try:
        import_lzma()
    except ImportError:
        return True

Example #20

0

Show file

File: common.py Project: lababidi/pandas

def _get_handle(source, mode, encoding=None, compression=None, memory_map=False):
    """Gets file handle for given path and mode.
    """

    f = source
    is_path = isinstance(source, compat.string_types)

    # in Python 3, convert BytesIO or fileobjects passed with an encoding
    if compat.PY3 and isinstance(source, compat.BytesIO):
        from io import TextIOWrapper

        return TextIOWrapper(source, encoding=encoding)

    elif compression is not None:
        compression = compression.lower()
        if encoding is not None and not compat.PY3 and not is_path:
            msg = 'encoding + compression not yet supported in Python 2'
            raise ValueError(msg)

        # GZ Compression
        if compression == 'gzip':
            import gzip

            f = gzip.GzipFile(source, mode) \
                if is_path else gzip.GzipFile(fileobj=source)

        # BZ Compression
        elif compression == 'bz2':
            import bz2

            if is_path:
                f = bz2.BZ2File(source, mode)

            else:
                f = bz2.BZ2File(source) if compat.PY3 else StringIO(
                    bz2.decompress(source.read()))
                # Python 2's bz2 module can't take file objects, so have to
                # run through decompress manually

        # ZIP Compression
        elif compression == 'zip':
            import zipfile
            zip_file = zipfile.ZipFile(source)
            zip_names = zip_file.namelist()

            if len(zip_names) == 1:
                f = zip_file.open(zip_names.pop())
            elif len(zip_names) == 0:
                raise ValueError('Zero files found in ZIP file {}'
                                 .format(source))
            else:
                raise ValueError('Multiple files found in ZIP file.'
                                 ' Only one file per ZIP :{}'
                                 .format(zip_names))

        # XZ Compression
        elif compression == 'xz':
            lzma = compat.import_lzma()
            f = lzma.LZMAFile(source, mode)

        else:
            raise ValueError('Unrecognized compression: %s' % compression)

        if compat.PY3:
            from io import TextIOWrapper

            f = TextIOWrapper(f, encoding=encoding)

        return f

    elif is_path:
        if compat.PY3:
            if encoding:
                f = open(source, mode, encoding=encoding)
            else:
                f = open(source, mode, errors='replace')
        else:
            f = open(source, mode)

    if memory_map and hasattr(f, 'fileno'):
        try:
            g = MMapWrapper(f)
            f.close()
            f = g
        except Exception:
            # we catch any errors that may have occurred
            # because that is consistent with the lower-level
            # functionality of the C engine (pd.read_csv), so
            # leave the file handler as is then
            pass

    return f

Example #21

0

Show file

File: compression.py Project: TomAugspurger/pandas

of the parsers defined in parsers.py
"""

import bz2
import gzip

import pytest

import pandas.compat as compat
import pandas.util._test_decorators as td

import pandas as pd
import pandas.util.testing as tm

try:
    lzma = compat.import_lzma()
except ImportError:
    lzma = None


class CompressionTests(object):

    def test_zip(self):
        import zipfile

        with open(self.csv1, 'rb') as data_file:
            data = data_file.read()
            expected = self.read_csv(self.csv1)

        with tm.ensure_clean('test_file.zip') as path:
            with zipfile.ZipFile(path, mode='w') as tmp: