コード例 #1
0
    def __init__(self,
                 filepath_or_buffer,
                 storage_options: StorageOptions = None):
        self.handles = IOHandles(handle=filepath_or_buffer,
                                 compression={"method": None})
        if not isinstance(filepath_or_buffer,
                          (ExcelFile, self._workbook_class)):
            self.handles = get_handle(filepath_or_buffer,
                                      "rb",
                                      storage_options=storage_options,
                                      is_text=False)

        if isinstance(self.handles.handle, self._workbook_class):
            self.book = self.handles.handle
        elif hasattr(self.handles.handle, "read"):
            # N.B. xlrd.Book has a read attribute too
            self.handles.handle.seek(0)
            try:
                self.book = self.load_workbook(self.handles.handle)
            except Exception:
                self.close()
                raise
        elif isinstance(self.handles.handle, bytes):
            self.book = self.load_workbook(BytesIO(self.handles.handle))
        else:
            raise ValueError(
                "Must explicitly set engine if not passing in buffer or path for io."
            )
コード例 #2
0
    def __init__(
        self,
        path: FilePathOrBuffer | ExcelWriter,
        engine=None,
        date_format=None,
        datetime_format=None,
        mode: str = "w",
        storage_options: StorageOptions = None,
        if_sheet_exists: str | None = None,
        engine_kwargs: dict | None = None,
        **kwargs,
    ):
        # validate that this engine can handle the extension
        if isinstance(path, str):
            ext = os.path.splitext(path)[-1]
            self.check_extension(ext)

        # use mode to open the file
        if "b" not in mode:
            mode += "b"
        # use "a" for the user to append data to excel but internally use "r+" to let
        # the excel backend first read the existing file and then write any data to it
        mode = mode.replace("a", "r+")

        # cast ExcelWriter to avoid adding 'if self.handles is not None'
        self.handles = IOHandles(cast(Buffer, path),
                                 compression={"copression": None})
        if not isinstance(path, ExcelWriter):
            self.handles = get_handle(path,
                                      mode,
                                      storage_options=storage_options,
                                      is_text=False)
        self.sheets: dict[str, Any] = {}
        self.cur_sheet = None

        if date_format is None:
            self.date_format = "YYYY-MM-DD"
        else:
            self.date_format = date_format
        if datetime_format is None:
            self.datetime_format = "YYYY-MM-DD HH:MM:SS"
        else:
            self.datetime_format = datetime_format

        self.mode = mode

        if if_sheet_exists not in [None, "error", "new", "replace"]:
            raise ValueError(
                f"'{if_sheet_exists}' is not valid for if_sheet_exists. "
                "Valid options are 'error', 'new' and 'replace'.")
        if if_sheet_exists and "r+" not in mode:
            raise ValueError(
                "if_sheet_exists is only valid in append mode (mode='a')")
        if if_sheet_exists is None:
            if_sheet_exists = "error"
        self.if_sheet_exists = if_sheet_exists
コード例 #3
0
ファイル: _base.py プロジェクト: zaghambajwa/pandas
    def __init__(
        self,
        path: Union[FilePathOrBuffer, ExcelWriter],
        engine=None,
        date_format=None,
        datetime_format=None,
        mode: str = "w",
        storage_options: StorageOptions = None,
        engine_kwargs: Optional[Dict] = None,
        **kwargs,
    ):
        # validate that this engine can handle the extension
        if isinstance(path, str):
            ext = os.path.splitext(path)[-1]
            self.check_extension(ext)

        # use mode to open the file
        if "b" not in mode:
            mode += "b"
        # use "a" for the user to append data to excel but internally use "r+" to let
        # the excel backend first read the existing file and then write any data to it
        mode = mode.replace("a", "r+")

        # cast ExcelWriter to avoid adding 'if self.handles is not None'
        self.handles = IOHandles(cast(Buffer, path),
                                 compression={"copression": None})
        if not isinstance(path, ExcelWriter):
            self.handles = get_handle(path,
                                      mode,
                                      storage_options=storage_options,
                                      is_text=False)
        self.sheets: Dict[str, Any] = {}
        self.cur_sheet = None

        if date_format is None:
            self.date_format = "YYYY-MM-DD"
        else:
            self.date_format = date_format
        if datetime_format is None:
            self.datetime_format = "YYYY-MM-DD HH:MM:SS"
        else:
            self.datetime_format = datetime_format

        self.mode = mode
コード例 #4
0
ファイル: _base.py プロジェクト: vedkharche538/pandas
    def __init__(
        self,
        path,
        engine=None,
        date_format=None,
        datetime_format=None,
        mode="w",
        **engine_kwargs,
    ):
        # validate that this engine can handle the extension
        if isinstance(path, str):
            ext = os.path.splitext(path)[-1]
            self.check_extension(ext)

        # use mode to open the file
        if "b" not in mode:
            mode += "b"
        # use "a" for the user to append data to excel but internally use "r+" to let
        # the excel backend first read the existing file and then write any data to it
        mode = mode.replace("a", "r+")

        self.handles = IOHandles(path, compression={"copression": None})
        if not isinstance(path, ExcelWriter):
            self.handles = get_handle(path, mode, is_text=False)
        self.sheets = {}
        self.cur_sheet = None

        if date_format is None:
            self.date_format = "YYYY-MM-DD"
        else:
            self.date_format = date_format
        if datetime_format is None:
            self.datetime_format = "YYYY-MM-DD HH:MM:SS"
        else:
            self.datetime_format = datetime_format

        self.mode = mode
コード例 #5
0
ファイル: _base.py プロジェクト: GetAlexed0/pythonProject
class ExcelWriter(metaclass=abc.ABCMeta):
    """
    Class for writing DataFrame objects into excel sheets.

    Default is to use xlwt for xls, openpyxl for xlsx, odf for ods.
    See DataFrame.to_excel for typical usage.

    The writer should be used as a context manager. Otherwise, call `close()` to save
    and close any opened file handles.

    Parameters
    ----------
    path : str or typing.BinaryIO
        Path to xls or xlsx or ods file.
    engine : str (optional)
        Engine to use for writing. If None, defaults to
        ``io.excel.<extension>.writer``.  NOTE: can only be passed as a keyword
        argument.

        .. deprecated:: 1.2.0

            As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer
            maintained, the ``xlwt`` engine will be removed in a future
            version of pandas.

    date_format : str, default None
        Format string for dates written into Excel files (e.g. 'YYYY-MM-DD').
    datetime_format : str, default None
        Format string for datetime objects written into Excel files.
        (e.g. 'YYYY-MM-DD HH:MM:SS').
    mode : {'w', 'a'}, default 'w'
        File mode to use (write or append). Append does not work with fsspec URLs.

        .. versionadded:: 0.24.0
    storage_options : dict, optional
        Extra options that make sense for a particular storage connection, e.g.
        host, port, username, password, etc., if using a URL that will
        be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".

        .. versionadded:: 1.2.0

    Attributes
    ----------
    None

    Methods
    -------
    None

    Notes
    -----
    None of the methods and properties are considered public.

    For compatibility with CSV writers, ExcelWriter serializes lists
    and dicts to strings before writing.

    Examples
    --------
    Default usage:

    >>> with ExcelWriter('path_to_file.xlsx') as writer:
    ...     df.to_excel(writer)

    To write to separate sheets in a single file:

    >>> with ExcelWriter('path_to_file.xlsx') as writer:
    ...     df1.to_excel(writer, sheet_name='Sheet1')
    ...     df2.to_excel(writer, sheet_name='Sheet2')

    You can set the date format or datetime format:

    >>> with ExcelWriter('path_to_file.xlsx',
    ...                   date_format='YYYY-MM-DD',
    ...                   datetime_format='YYYY-MM-DD HH:MM:SS') as writer:
    ...     df.to_excel(writer)

    You can also append to an existing Excel file:

    >>> with ExcelWriter('path_to_file.xlsx', mode='a') as writer:
    ...     df.to_excel(writer, sheet_name='Sheet3')

    You can store Excel file in RAM:

    >>> import io
    >>> buffer = io.BytesIO()
    >>> with pd.ExcelWriter(buffer) as writer:
    ...     df.to_excel(writer)

    You can pack Excel file into zip archive:

    >>> import zipfile
    >>> with zipfile.ZipFile('path_to_file.zip', 'w') as zf:
    ...     with zf.open('filename.xlsx', 'w') as buffer:
    ...         with pd.ExcelWriter(buffer) as writer:
    ...             df.to_excel(writer)
    """

    # Defining an ExcelWriter implementation (see abstract methods for more...)

    # - Mandatory
    #   - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)``
    #     --> called to write additional DataFrames to disk
    #   - ``supported_extensions`` (tuple of supported extensions), used to
    #      check that engine supports the given extension.
    #   - ``engine`` - string that gives the engine name. Necessary to
    #     instantiate class directly and bypass ``ExcelWriterMeta`` engine
    #     lookup.
    #   - ``save(self)`` --> called to save file to disk
    # - Mostly mandatory (i.e. should at least exist)
    #   - book, cur_sheet, path

    # - Optional:
    #   - ``__init__(self, path, engine=None, **kwargs)`` --> always called
    #     with path as first argument.

    # You also need to register the class with ``register_writer()``.
    # Technically, ExcelWriter implementations don't need to subclass
    # ExcelWriter.
    def __new__(cls, path, engine=None, **kwargs):
        # only switch class if generic(ExcelWriter)

        if cls is ExcelWriter:
            if engine is None or (isinstance(engine, str)
                                  and engine == "auto"):
                if isinstance(path, str):
                    ext = os.path.splitext(path)[-1][1:]
                else:
                    ext = "xlsx"

                try:
                    engine = config.get_option(f"io.excel.{ext}.writer",
                                               silent=True)
                    if engine == "auto":
                        engine = get_default_writer(ext)
                except KeyError as err:
                    raise ValueError(
                        f"No engine for filetype: '{ext}'") from err

            if engine == "xlwt":
                xls_config_engine = config.get_option("io.excel.xls.writer",
                                                      silent=True)
                # Don't warn a 2nd time if user has changed the default engine for xls
                if xls_config_engine != "xlwt":
                    warnings.warn(
                        "As the xlwt package is no longer maintained, the xlwt "
                        "engine will be removed in a future version of pandas. "
                        "This is the only engine in pandas that supports writing "
                        "in the xls format. Install openpyxl and write to an xlsx "
                        "file instead. You can set the option io.excel.xls.writer "
                        "to 'xlwt' to silence this warning. While this option is "
                        "deprecated and will also raise a warning, it can "
                        "be globally set and the warning suppressed.",
                        FutureWarning,
                        stacklevel=4,
                    )

            cls = get_writer(engine)

        return object.__new__(cls)

    # declare external properties you can count on
    curr_sheet = None
    path = None

    @property
    @abc.abstractmethod
    def supported_extensions(self):
        """Extensions that writer engine supports."""
        pass

    @property
    @abc.abstractmethod
    def engine(self):
        """Name of engine."""
        pass

    @abc.abstractmethod
    def write_cells(self,
                    cells,
                    sheet_name=None,
                    startrow=0,
                    startcol=0,
                    freeze_panes=None):
        """
        Write given formatted cells into Excel an excel sheet

        Parameters
        ----------
        cells : generator
            cell of formatted data to save to Excel sheet
        sheet_name : str, default None
            Name of Excel sheet, if None, then use self.cur_sheet
        startrow : upper left cell row to dump data frame
        startcol : upper left cell column to dump data frame
        freeze_panes: int tuple of length 2
            contains the bottom-most row and right-most column to freeze
        """
        pass

    @abc.abstractmethod
    def save(self):
        """
        Save workbook to disk.
        """
        pass

    def __init__(
        self,
        path: Union[FilePathOrBuffer, "ExcelWriter"],
        engine=None,
        date_format=None,
        datetime_format=None,
        mode: str = "w",
        storage_options: StorageOptions = None,
        **engine_kwargs,
    ):
        # validate that this engine can handle the extension
        if isinstance(path, str):
            ext = os.path.splitext(path)[-1]
            self.check_extension(ext)

        # use mode to open the file
        if "b" not in mode:
            mode += "b"
        # use "a" for the user to append data to excel but internally use "r+" to let
        # the excel backend first read the existing file and then write any data to it
        mode = mode.replace("a", "r+")

        # cast ExcelWriter to avoid adding 'if self.handles is not None'
        self.handles = IOHandles(cast(Buffer, path),
                                 compression={"copression": None})
        if not isinstance(path, ExcelWriter):
            self.handles = get_handle(path,
                                      mode,
                                      storage_options=storage_options,
                                      is_text=False)
        self.sheets: Dict[str, Any] = {}
        self.cur_sheet = None

        if date_format is None:
            self.date_format = "YYYY-MM-DD"
        else:
            self.date_format = date_format
        if datetime_format is None:
            self.datetime_format = "YYYY-MM-DD HH:MM:SS"
        else:
            self.datetime_format = datetime_format

        self.mode = mode

    def __fspath__(self):
        return getattr(self.handles.handle, "name", "")

    def _get_sheet_name(self, sheet_name):
        if sheet_name is None:
            sheet_name = self.cur_sheet
        if sheet_name is None:  # pragma: no cover
            raise ValueError(
                "Must pass explicit sheet_name or set cur_sheet property")
        return sheet_name

    def _value_with_fmt(self, val):
        """
        Convert numpy types to Python types for the Excel writers.

        Parameters
        ----------
        val : object
            Value to be written into cells

        Returns
        -------
        Tuple with the first element being the converted value and the second
            being an optional format
        """
        fmt = None

        if is_integer(val):
            val = int(val)
        elif is_float(val):
            val = float(val)
        elif is_bool(val):
            val = bool(val)
        elif isinstance(val, datetime.datetime):
            fmt = self.datetime_format
        elif isinstance(val, datetime.date):
            fmt = self.date_format
        elif isinstance(val, datetime.timedelta):
            val = val.total_seconds() / float(86400)
            fmt = "0"
        else:
            val = str(val)

        return val, fmt

    @classmethod
    def check_extension(cls, ext: str):
        """
        checks that path's extension against the Writer's supported
        extensions.  If it isn't supported, raises UnsupportedFiletypeError.
        """
        if ext.startswith("."):
            ext = ext[1:]
        # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__"
        #  (not iterable)  [attr-defined]
        if not any(ext in extension for extension in
                   cls.supported_extensions  # type: ignore[attr-defined]
                   ):
            raise ValueError(
                f"Invalid extension for engine '{cls.engine}': '{ext}'")
        else:
            return True

    # Allow use as a contextmanager
    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def close(self):
        """synonym for save, to make it more file-like"""
        content = self.save()
        self.handles.close()
        return content
コード例 #6
0
ファイル: _base.py プロジェクト: GetAlexed0/pythonProject
class BaseExcelReader(metaclass=abc.ABCMeta):
    def __init__(self,
                 filepath_or_buffer,
                 storage_options: StorageOptions = None):
        self.handles = IOHandles(handle=filepath_or_buffer,
                                 compression={"method": None})
        if not isinstance(filepath_or_buffer,
                          (ExcelFile, self._workbook_class)):
            self.handles = get_handle(filepath_or_buffer,
                                      "rb",
                                      storage_options=storage_options,
                                      is_text=False)

        if isinstance(self.handles.handle, self._workbook_class):
            self.book = self.handles.handle
        elif hasattr(self.handles.handle, "read"):
            # N.B. xlrd.Book has a read attribute too
            self.handles.handle.seek(0)
            self.book = self.load_workbook(self.handles.handle)
        elif isinstance(self.handles.handle, bytes):
            self.book = self.load_workbook(BytesIO(self.handles.handle))
        else:
            raise ValueError(
                "Must explicitly set engine if not passing in buffer or path for io."
            )

    @property
    @abc.abstractmethod
    def _workbook_class(self):
        pass

    @abc.abstractmethod
    def load_workbook(self, filepath_or_buffer):
        pass

    def close(self):
        self.handles.close()

    @property
    @abc.abstractmethod
    def sheet_names(self):
        pass

    @abc.abstractmethod
    def get_sheet_by_name(self, name):
        pass

    @abc.abstractmethod
    def get_sheet_by_index(self, index):
        pass

    @abc.abstractmethod
    def get_sheet_data(self, sheet, convert_float):
        pass

    def parse(
        self,
        sheet_name=0,
        header=0,
        names=None,
        index_col=None,
        usecols=None,
        squeeze=False,
        dtype=None,
        true_values=None,
        false_values=None,
        skiprows=None,
        nrows=None,
        na_values=None,
        verbose=False,
        parse_dates=False,
        date_parser=None,
        thousands=None,
        comment=None,
        skipfooter=0,
        convert_float=True,
        mangle_dupe_cols=True,
        **kwds,
    ):

        validate_header_arg(header)

        ret_dict = False

        # Keep sheetname to maintain backwards compatibility.
        if isinstance(sheet_name, list):
            sheets = sheet_name
            ret_dict = True
        elif sheet_name is None:
            sheets = self.sheet_names
            ret_dict = True
        else:
            sheets = [sheet_name]

        # handle same-type duplicates.
        sheets = list(dict.fromkeys(sheets).keys())

        output = {}

        for asheetname in sheets:
            if verbose:
                print(f"Reading sheet {asheetname}")

            if isinstance(asheetname, str):
                sheet = self.get_sheet_by_name(asheetname)
            else:  # assume an integer if not a string
                sheet = self.get_sheet_by_index(asheetname)

            data = self.get_sheet_data(sheet, convert_float)
            usecols = maybe_convert_usecols(usecols)

            if not data:
                output[asheetname] = DataFrame()
                continue

            if is_list_like(header) and len(header) == 1:
                header = header[0]

            # forward fill and pull out names for MultiIndex column
            header_names = None
            if header is not None and is_list_like(header):
                header_names = []
                control_row = [True] * len(data[0])

                for row in header:
                    if is_integer(skiprows):
                        row += skiprows

                    data[row], control_row = fill_mi_header(
                        data[row], control_row)

                    if index_col is not None:
                        header_name, _ = pop_header_name(data[row], index_col)
                        header_names.append(header_name)

            if is_list_like(index_col):
                # Forward fill values for MultiIndex index.
                if header is None:
                    offset = 0
                elif not is_list_like(header):
                    offset = 1 + header
                else:
                    offset = 1 + max(header)

                # Check if we have an empty dataset
                # before trying to collect data.
                if offset < len(data):
                    for col in index_col:
                        last = data[offset][col]

                        for row in range(offset + 1, len(data)):
                            if data[row][col] == "" or data[row][col] is None:
                                data[row][col] = last
                            else:
                                last = data[row][col]

            has_index_names = is_list_like(header) and len(header) > 1

            # GH 12292 : error when read one empty column from excel file
            try:
                parser = TextParser(
                    data,
                    names=names,
                    header=header,
                    index_col=index_col,
                    has_index_names=has_index_names,
                    squeeze=squeeze,
                    dtype=dtype,
                    true_values=true_values,
                    false_values=false_values,
                    skiprows=skiprows,
                    nrows=nrows,
                    na_values=na_values,
                    parse_dates=parse_dates,
                    date_parser=date_parser,
                    thousands=thousands,
                    comment=comment,
                    skipfooter=skipfooter,
                    usecols=usecols,
                    mangle_dupe_cols=mangle_dupe_cols,
                    **kwds,
                )

                output[asheetname] = parser.read(nrows=nrows)

                if not squeeze or isinstance(output[asheetname], DataFrame):
                    if header_names:
                        output[asheetname].columns = output[
                            asheetname].columns.set_names(header_names)

            except EmptyDataError:
                # No Data, return an empty DataFrame
                output[asheetname] = DataFrame()

        if ret_dict:
            return output
        else:
            return output[asheetname]
コード例 #7
0
class BaseExcelReader(metaclass=abc.ABCMeta):
    def __init__(self,
                 filepath_or_buffer,
                 storage_options: StorageOptions = None):
        self.handles = IOHandles(handle=filepath_or_buffer,
                                 compression={"method": None})
        if not isinstance(filepath_or_buffer,
                          (ExcelFile, self._workbook_class)):
            self.handles = get_handle(filepath_or_buffer,
                                      "rb",
                                      storage_options=storage_options,
                                      is_text=False)

        if isinstance(self.handles.handle, self._workbook_class):
            self.book = self.handles.handle
        elif hasattr(self.handles.handle, "read"):
            # N.B. xlrd.Book has a read attribute too
            self.handles.handle.seek(0)
            try:
                self.book = self.load_workbook(self.handles.handle)
            except Exception:
                self.close()
                raise
        elif isinstance(self.handles.handle, bytes):
            self.book = self.load_workbook(BytesIO(self.handles.handle))
        else:
            raise ValueError(
                "Must explicitly set engine if not passing in buffer or path for io."
            )

    @property
    @abc.abstractmethod
    def _workbook_class(self):
        pass

    @abc.abstractmethod
    def load_workbook(self, filepath_or_buffer):
        pass

    def close(self):
        if hasattr(self, "book") and hasattr(self.book, "close"):
            # pyxlsb: opens a TemporaryFile
            # openpyxl: https://stackoverflow.com/questions/31416842/
            #     openpyxl-does-not-close-excel-workbook-in-read-only-mode
            self.book.close()
        self.handles.close()

    @property
    @abc.abstractmethod
    def sheet_names(self):
        pass

    @abc.abstractmethod
    def get_sheet_by_name(self, name):
        pass

    @abc.abstractmethod
    def get_sheet_by_index(self, index):
        pass

    @abc.abstractmethod
    def get_sheet_data(self, sheet, convert_float):
        pass

    def raise_if_bad_sheet_by_index(self, index: int) -> None:
        n_sheets = len(self.sheet_names)
        if index >= n_sheets:
            raise ValueError(
                f"Worksheet index {index} is invalid, {n_sheets} worksheets found"
            )

    def raise_if_bad_sheet_by_name(self, name: str) -> None:
        if name not in self.sheet_names:
            raise ValueError(f"Worksheet named '{name}' not found")

    def parse(
        self,
        sheet_name=0,
        header=0,
        names=None,
        index_col=None,
        usecols=None,
        squeeze=False,
        dtype: DtypeArg | None = None,
        true_values=None,
        false_values=None,
        skiprows=None,
        nrows=None,
        na_values=None,
        verbose=False,
        parse_dates=False,
        date_parser=None,
        thousands=None,
        comment=None,
        skipfooter=0,
        convert_float=None,
        mangle_dupe_cols=True,
        **kwds,
    ):

        if convert_float is None:
            convert_float = True
        else:
            stacklevel = find_stack_level()
            warnings.warn(
                "convert_float is deprecated and will be removed in a future version.",
                FutureWarning,
                stacklevel=stacklevel,
            )

        validate_header_arg(header)

        ret_dict = False

        # Keep sheetname to maintain backwards compatibility.
        if isinstance(sheet_name, list):
            sheets = sheet_name
            ret_dict = True
        elif sheet_name is None:
            sheets = self.sheet_names
            ret_dict = True
        else:
            sheets = [sheet_name]

        # handle same-type duplicates.
        sheets = list(dict.fromkeys(sheets).keys())

        output = {}

        for asheetname in sheets:
            if verbose:
                print(f"Reading sheet {asheetname}")

            if isinstance(asheetname, str):
                sheet = self.get_sheet_by_name(asheetname)
            else:  # assume an integer if not a string
                sheet = self.get_sheet_by_index(asheetname)

            data = self.get_sheet_data(sheet, convert_float)
            if hasattr(sheet, "close"):
                # pyxlsb opens two TemporaryFiles
                sheet.close()
            usecols = maybe_convert_usecols(usecols)

            if not data:
                output[asheetname] = DataFrame()
                continue

            if is_list_like(header) and len(header) == 1:
                header = header[0]

            # forward fill and pull out names for MultiIndex column
            header_names = None
            if header is not None and is_list_like(header):
                header_names = []
                control_row = [True] * len(data[0])

                for row in header:
                    if is_integer(skiprows):
                        row += skiprows

                    data[row], control_row = fill_mi_header(
                        data[row], control_row)

                    if index_col is not None:
                        header_name, _ = pop_header_name(data[row], index_col)
                        header_names.append(header_name)

            # If there is a MultiIndex header and an index then there is also
            # a row containing just the index name(s)
            has_index_names = (is_list_like(header) and len(header) > 1
                               and index_col is not None)

            if is_list_like(index_col):
                # Forward fill values for MultiIndex index.
                if header is None:
                    offset = 0
                elif not is_list_like(header):
                    offset = 1 + header
                else:
                    offset = 1 + max(header)

                # GH34673: if MultiIndex names present and not defined in the header,
                # offset needs to be incremented so that forward filling starts
                # from the first MI value instead of the name
                if has_index_names:
                    offset += 1

                # Check if we have an empty dataset
                # before trying to collect data.
                if offset < len(data):
                    for col in index_col:
                        last = data[offset][col]

                        for row in range(offset + 1, len(data)):
                            if data[row][col] == "" or data[row][col] is None:
                                data[row][col] = last
                            else:
                                last = data[row][col]

            # GH 12292 : error when read one empty column from excel file
            try:
                parser = TextParser(
                    data,
                    names=names,
                    header=header,
                    index_col=index_col,
                    has_index_names=has_index_names,
                    squeeze=squeeze,
                    dtype=dtype,
                    true_values=true_values,
                    false_values=false_values,
                    skiprows=skiprows,
                    nrows=nrows,
                    na_values=na_values,
                    skip_blank_lines=False,  # GH 39808
                    parse_dates=parse_dates,
                    date_parser=date_parser,
                    thousands=thousands,
                    comment=comment,
                    skipfooter=skipfooter,
                    usecols=usecols,
                    mangle_dupe_cols=mangle_dupe_cols,
                    **kwds,
                )

                output[asheetname] = parser.read(nrows=nrows)

                if not squeeze or isinstance(output[asheetname], DataFrame):
                    if header_names:
                        output[asheetname].columns = output[
                            asheetname].columns.set_names(header_names)

            except EmptyDataError:
                # No Data, return an empty DataFrame
                output[asheetname] = DataFrame()

        if ret_dict:
            return output
        else:
            return output[asheetname]
コード例 #8
0
ファイル: _base.py プロジェクト: ukarroum/pandas
class ExcelWriter(metaclass=abc.ABCMeta):
    """
    Class for writing DataFrame objects into excel sheets.

    Default is to use :
    * xlwt for xls
    * xlsxwriter for xlsx if xlsxwriter is installed otherwise openpyxl
    * odf for ods.
    See DataFrame.to_excel for typical usage.

    The writer should be used as a context manager. Otherwise, call `close()` to save
    and close any opened file handles.

    Parameters
    ----------
    path : str or typing.BinaryIO
        Path to xls or xlsx or ods file.
    engine : str (optional)
        Engine to use for writing. If None, defaults to
        ``io.excel.<extension>.writer``.  NOTE: can only be passed as a keyword
        argument.

        .. deprecated:: 1.2.0

            As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer
            maintained, the ``xlwt`` engine will be removed in a future
            version of pandas.

    date_format : str, default None
        Format string for dates written into Excel files (e.g. 'YYYY-MM-DD').
    datetime_format : str, default None
        Format string for datetime objects written into Excel files.
        (e.g. 'YYYY-MM-DD HH:MM:SS').
    mode : {'w', 'a'}, default 'w'
        File mode to use (write or append). Append does not work with fsspec URLs.
    storage_options : dict, optional
        Extra options that make sense for a particular storage connection, e.g.
        host, port, username, password, etc., if using a URL that will
        be parsed by ``fsspec``, e.g., starting "s3://", "gcs://".

        .. versionadded:: 1.2.0

    if_sheet_exists : {'error', 'new', 'replace', 'overlay'}, default 'error'
        How to behave when trying to write to a sheet that already
        exists (append mode only).

        * error: raise a ValueError.
        * new: Create a new sheet, with a name determined by the engine.
        * replace: Delete the contents of the sheet before writing to it.
        * overlay: Write contents to the existing sheet without removing the old
          contents.

        .. versionadded:: 1.3.0

        .. versionchanged:: 1.4.0

           Added ``overlay`` option

    engine_kwargs : dict, optional
        Keyword arguments to be passed into the engine. These will be passed to
        the following functions of the respective engines:

        * xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)``
        * openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)``
        * openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)``
        * odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)``

        .. versionadded:: 1.3.0
    **kwargs : dict, optional
        Keyword arguments to be passed into the engine.

        .. deprecated:: 1.3.0

            Use engine_kwargs instead.

    Attributes
    ----------
    None

    Methods
    -------
    None

    Notes
    -----
    None of the methods and properties are considered public.

    For compatibility with CSV writers, ExcelWriter serializes lists
    and dicts to strings before writing.

    Examples
    --------
    Default usage:

    >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
    >>> with pd.ExcelWriter("path_to_file.xlsx") as writer:
    ...     df.to_excel(writer)

    To write to separate sheets in a single file:

    >>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"])
    >>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
    >>> with pd.ExcelWriter("path_to_file.xlsx") as writer:
    ...     df1.to_excel(writer, sheet_name="Sheet1")
    ...     df2.to_excel(writer, sheet_name="Sheet2")

    You can set the date format or datetime format:

    >>> from datetime import date, datetime
    >>> df = pd.DataFrame(
    ...     [
    ...         [date(2014, 1, 31), date(1999, 9, 24)],
    ...         [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)],
    ...     ],
    ...     index=["Date", "Datetime"],
    ...     columns=["X", "Y"],
    ... )
    >>> with pd.ExcelWriter(
    ...     "path_to_file.xlsx",
    ...     date_format="YYYY-MM-DD",
    ...     datetime_format="YYYY-MM-DD HH:MM:SS"
    ... ) as writer:
    ...     df.to_excel(writer)

    You can also append to an existing Excel file:

    >>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer:
    ...     df.to_excel(writer, sheet_name="Sheet3")

    Here, the `if_sheet_exists` parameter can be set to replace a sheet if it
    already exists:

    >>> with ExcelWriter(
    ...     "path_to_file.xlsx",
    ...     mode="a",
    ...     engine="openpyxl",
    ...     if_sheet_exists="replace",
    ... ) as writer:
    ...     df.to_excel(writer, sheet_name="Sheet1")

    You can also write multiple DataFrames to a single sheet. Note that the
    ``if_sheet_exists`` parameter needs to be set to ``overlay``:

    >>> with ExcelWriter("path_to_file.xlsx",
    ...     mode="a",
    ...     engine="openpyxl",
    ...     if_sheet_exists="overlay",
    ... ) as writer:
    ...     df1.to_excel(writer, sheet_name="Sheet1")
    ...     df2.to_excel(writer, sheet_name="Sheet1", startcol=3)

    You can store Excel file in RAM:

    >>> import io
    >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
    >>> buffer = io.BytesIO()
    >>> with pd.ExcelWriter(buffer) as writer:
    ...     df.to_excel(writer)

    You can pack Excel file into zip archive:

    >>> import zipfile
    >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"])
    >>> with zipfile.ZipFile("path_to_file.zip", "w") as zf:
    ...     with zf.open("filename.xlsx", "w") as buffer:
    ...         with pd.ExcelWriter(buffer) as writer:
    ...             df.to_excel(writer)

    You can specify additional arguments to the underlying engine:

    >>> with pd.ExcelWriter(
    ...     "path_to_file.xlsx",
    ...     engine="xlsxwriter",
    ...     engine_kwargs={"options": {"nan_inf_to_errors": True}}
    ... ) as writer:
    ...     df.to_excel(writer)

    In append mode, ``engine_kwargs`` are passed through to
    openpyxl's ``load_workbook``:

    >>> with pd.ExcelWriter(
    ...     "path_to_file.xlsx",
    ...     engine="openpyxl",
    ...     mode="a",
    ...     engine_kwargs={"keep_vba": True}
    ... ) as writer:
    ...     df.to_excel(writer, sheet_name="Sheet2")
    """

    # Defining an ExcelWriter implementation (see abstract methods for more...)

    # - Mandatory
    #   - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)``
    #     --> called to write additional DataFrames to disk
    #   - ``supported_extensions`` (tuple of supported extensions), used to
    #      check that engine supports the given extension.
    #   - ``engine`` - string that gives the engine name. Necessary to
    #     instantiate class directly and bypass ``ExcelWriterMeta`` engine
    #     lookup.
    #   - ``save(self)`` --> called to save file to disk
    # - Mostly mandatory (i.e. should at least exist)
    #   - book, cur_sheet, path

    # - Optional:
    #   - ``__init__(self, path, engine=None, **kwargs)`` --> always called
    #     with path as first argument.

    # You also need to register the class with ``register_writer()``.
    # Technically, ExcelWriter implementations don't need to subclass
    # ExcelWriter.
    def __new__(
        cls,
        path: FilePath | WriteExcelBuffer | ExcelWriter,
        engine=None,
        date_format=None,
        datetime_format=None,
        mode: str = "w",
        storage_options: StorageOptions = None,
        if_sheet_exists: str | None = None,
        engine_kwargs: dict | None = None,
        **kwargs,
    ):
        if kwargs:
            if engine_kwargs is not None:
                raise ValueError("Cannot use both engine_kwargs and **kwargs")
            warnings.warn(
                "Use of **kwargs is deprecated, use engine_kwargs instead.",
                FutureWarning,
                stacklevel=find_stack_level(),
            )

        # only switch class if generic(ExcelWriter)

        if cls is ExcelWriter:
            if engine is None or (isinstance(engine, str)
                                  and engine == "auto"):
                if isinstance(path, str):
                    ext = os.path.splitext(path)[-1][1:]
                else:
                    ext = "xlsx"

                try:
                    engine = config.get_option(f"io.excel.{ext}.writer",
                                               silent=True)
                    if engine == "auto":
                        engine = get_default_engine(ext, mode="writer")
                except KeyError as err:
                    raise ValueError(
                        f"No engine for filetype: '{ext}'") from err

            if engine == "xlwt":
                xls_config_engine = config.get_option("io.excel.xls.writer",
                                                      silent=True)
                # Don't warn a 2nd time if user has changed the default engine for xls
                if xls_config_engine != "xlwt":
                    warnings.warn(
                        "As the xlwt package is no longer maintained, the xlwt "
                        "engine will be removed in a future version of pandas. "
                        "This is the only engine in pandas that supports writing "
                        "in the xls format. Install openpyxl and write to an xlsx "
                        "file instead. You can set the option io.excel.xls.writer "
                        "to 'xlwt' to silence this warning. While this option is "
                        "deprecated and will also raise a warning, it can "
                        "be globally set and the warning suppressed.",
                        FutureWarning,
                        stacklevel=find_stack_level(),
                    )

            cls = get_writer(engine)

        return object.__new__(cls)

    # declare external properties you can count on
    path = None

    @property
    @abc.abstractmethod
    def supported_extensions(self):
        """Extensions that writer engine supports."""
        pass

    @property
    @abc.abstractmethod
    def engine(self):
        """Name of engine."""
        pass

    @abc.abstractmethod
    def write_cells(self,
                    cells,
                    sheet_name=None,
                    startrow=0,
                    startcol=0,
                    freeze_panes=None):
        """
        Write given formatted cells into Excel an excel sheet

        Parameters
        ----------
        cells : generator
            cell of formatted data to save to Excel sheet
        sheet_name : str, default None
            Name of Excel sheet, if None, then use self.cur_sheet
        startrow : upper left cell row to dump data frame
        startcol : upper left cell column to dump data frame
        freeze_panes: int tuple of length 2
            contains the bottom-most row and right-most column to freeze
        """
        pass

    @abc.abstractmethod
    def save(self):
        """
        Save workbook to disk.
        """
        pass

    def __init__(
        self,
        path: FilePath | WriteExcelBuffer | ExcelWriter,
        engine=None,
        date_format=None,
        datetime_format=None,
        mode: str = "w",
        storage_options: StorageOptions = None,
        if_sheet_exists: str | None = None,
        engine_kwargs: dict | None = None,
        **kwargs,
    ):
        # validate that this engine can handle the extension
        if isinstance(path, str):
            ext = os.path.splitext(path)[-1]
            self.check_extension(ext)

        # use mode to open the file
        if "b" not in mode:
            mode += "b"
        # use "a" for the user to append data to excel but internally use "r+" to let
        # the excel backend first read the existing file and then write any data to it
        mode = mode.replace("a", "r+")

        # cast ExcelWriter to avoid adding 'if self.handles is not None'
        self.handles = IOHandles(cast(IO[bytes], path),
                                 compression={"copression": None})
        if not isinstance(path, ExcelWriter):
            self.handles = get_handle(path,
                                      mode,
                                      storage_options=storage_options,
                                      is_text=False)
        self.sheets: dict[str, Any] = {}
        self.cur_sheet = None

        if date_format is None:
            self.date_format = "YYYY-MM-DD"
        else:
            self.date_format = date_format
        if datetime_format is None:
            self.datetime_format = "YYYY-MM-DD HH:MM:SS"
        else:
            self.datetime_format = datetime_format

        self.mode = mode

        if if_sheet_exists not in (None, "error", "new", "replace", "overlay"):
            raise ValueError(
                f"'{if_sheet_exists}' is not valid for if_sheet_exists. "
                "Valid options are 'error', 'new', 'replace' and 'overlay'.")
        if if_sheet_exists and "r+" not in mode:
            raise ValueError(
                "if_sheet_exists is only valid in append mode (mode='a')")
        if if_sheet_exists is None:
            if_sheet_exists = "error"
        self.if_sheet_exists = if_sheet_exists

    def __fspath__(self):
        return getattr(self.handles.handle, "name", "")

    def _get_sheet_name(self, sheet_name):
        if sheet_name is None:
            sheet_name = self.cur_sheet
        if sheet_name is None:  # pragma: no cover
            raise ValueError(
                "Must pass explicit sheet_name or set cur_sheet property")
        return sheet_name

    def _value_with_fmt(self, val):
        """
        Convert numpy types to Python types for the Excel writers.

        Parameters
        ----------
        val : object
            Value to be written into cells

        Returns
        -------
        Tuple with the first element being the converted value and the second
            being an optional format
        """
        fmt = None

        if is_integer(val):
            val = int(val)
        elif is_float(val):
            val = float(val)
        elif is_bool(val):
            val = bool(val)
        elif isinstance(val, datetime.datetime):
            fmt = self.datetime_format
        elif isinstance(val, datetime.date):
            fmt = self.date_format
        elif isinstance(val, datetime.timedelta):
            val = val.total_seconds() / 86400
            fmt = "0"
        else:
            val = str(val)

        return val, fmt

    @classmethod
    def check_extension(cls, ext: str):
        """
        checks that path's extension against the Writer's supported
        extensions.  If it isn't supported, raises UnsupportedFiletypeError.
        """
        if ext.startswith("."):
            ext = ext[1:]
        # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" (not
        #  iterable)
        if not any(ext in extension for extension in
                   cls.supported_extensions  # type: ignore[attr-defined]
                   ):
            raise ValueError(
                f"Invalid extension for engine '{cls.engine}': '{ext}'")
        else:
            return True

    # Allow use as a contextmanager
    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def close(self):
        """synonym for save, to make it more file-like"""
        content = self.save()
        self.handles.close()
        return content