def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): self.handles = IOHandles(handle=filepath_or_buffer, compression={"method": None}) if not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): self.handles = get_handle(filepath_or_buffer, "rb", storage_options=storage_options, is_text=False) if isinstance(self.handles.handle, self._workbook_class): self.book = self.handles.handle elif hasattr(self.handles.handle, "read"): # N.B. xlrd.Book has a read attribute too self.handles.handle.seek(0) try: self.book = self.load_workbook(self.handles.handle) except Exception: self.close() raise elif isinstance(self.handles.handle, bytes): self.book = self.load_workbook(BytesIO(self.handles.handle)) else: raise ValueError( "Must explicitly set engine if not passing in buffer or path for io." )
def __init__( self, path: FilePathOrBuffer | ExcelWriter, engine=None, date_format=None, datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, if_sheet_exists: str | None = None, engine_kwargs: dict | None = None, **kwargs, ): # validate that this engine can handle the extension if isinstance(path, str): ext = os.path.splitext(path)[-1] self.check_extension(ext) # use mode to open the file if "b" not in mode: mode += "b" # use "a" for the user to append data to excel but internally use "r+" to let # the excel backend first read the existing file and then write any data to it mode = mode.replace("a", "r+") # cast ExcelWriter to avoid adding 'if self.handles is not None' self.handles = IOHandles(cast(Buffer, path), compression={"copression": None}) if not isinstance(path, ExcelWriter): self.handles = get_handle(path, mode, storage_options=storage_options, is_text=False) self.sheets: dict[str, Any] = {} self.cur_sheet = None if date_format is None: self.date_format = "YYYY-MM-DD" else: self.date_format = date_format if datetime_format is None: self.datetime_format = "YYYY-MM-DD HH:MM:SS" else: self.datetime_format = datetime_format self.mode = mode if if_sheet_exists not in [None, "error", "new", "replace"]: raise ValueError( f"'{if_sheet_exists}' is not valid for if_sheet_exists. " "Valid options are 'error', 'new' and 'replace'.") if if_sheet_exists and "r+" not in mode: raise ValueError( "if_sheet_exists is only valid in append mode (mode='a')") if if_sheet_exists is None: if_sheet_exists = "error" self.if_sheet_exists = if_sheet_exists
def __init__( self, path: Union[FilePathOrBuffer, ExcelWriter], engine=None, date_format=None, datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, engine_kwargs: Optional[Dict] = None, **kwargs, ): # validate that this engine can handle the extension if isinstance(path, str): ext = os.path.splitext(path)[-1] self.check_extension(ext) # use mode to open the file if "b" not in mode: mode += "b" # use "a" for the user to append data to excel but internally use "r+" to let # the excel backend first read the existing file and then write any data to it mode = mode.replace("a", "r+") # cast ExcelWriter to avoid adding 'if self.handles is not None' self.handles = IOHandles(cast(Buffer, path), compression={"copression": None}) if not isinstance(path, ExcelWriter): self.handles = get_handle(path, mode, storage_options=storage_options, is_text=False) self.sheets: Dict[str, Any] = {} self.cur_sheet = None if date_format is None: self.date_format = "YYYY-MM-DD" else: self.date_format = date_format if datetime_format is None: self.datetime_format = "YYYY-MM-DD HH:MM:SS" else: self.datetime_format = datetime_format self.mode = mode
def __init__( self, path, engine=None, date_format=None, datetime_format=None, mode="w", **engine_kwargs, ): # validate that this engine can handle the extension if isinstance(path, str): ext = os.path.splitext(path)[-1] self.check_extension(ext) # use mode to open the file if "b" not in mode: mode += "b" # use "a" for the user to append data to excel but internally use "r+" to let # the excel backend first read the existing file and then write any data to it mode = mode.replace("a", "r+") self.handles = IOHandles(path, compression={"copression": None}) if not isinstance(path, ExcelWriter): self.handles = get_handle(path, mode, is_text=False) self.sheets = {} self.cur_sheet = None if date_format is None: self.date_format = "YYYY-MM-DD" else: self.date_format = date_format if datetime_format is None: self.datetime_format = "YYYY-MM-DD HH:MM:SS" else: self.datetime_format = datetime_format self.mode = mode
class ExcelWriter(metaclass=abc.ABCMeta): """ Class for writing DataFrame objects into excel sheets. Default is to use xlwt for xls, openpyxl for xlsx, odf for ods. See DataFrame.to_excel for typical usage. The writer should be used as a context manager. Otherwise, call `close()` to save and close any opened file handles. Parameters ---------- path : str or typing.BinaryIO Path to xls or xlsx or ods file. engine : str (optional) Engine to use for writing. If None, defaults to ``io.excel.<extension>.writer``. NOTE: can only be passed as a keyword argument. .. deprecated:: 1.2.0 As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer maintained, the ``xlwt`` engine will be removed in a future version of pandas. date_format : str, default None Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). datetime_format : str, default None Format string for datetime objects written into Excel files. (e.g. 'YYYY-MM-DD HH:MM:SS'). mode : {'w', 'a'}, default 'w' File mode to use (write or append). Append does not work with fsspec URLs. .. versionadded:: 0.24.0 storage_options : dict, optional Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc., if using a URL that will be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". .. versionadded:: 1.2.0 Attributes ---------- None Methods ------- None Notes ----- None of the methods and properties are considered public. For compatibility with CSV writers, ExcelWriter serializes lists and dicts to strings before writing. Examples -------- Default usage: >>> with ExcelWriter('path_to_file.xlsx') as writer: ... df.to_excel(writer) To write to separate sheets in a single file: >>> with ExcelWriter('path_to_file.xlsx') as writer: ... df1.to_excel(writer, sheet_name='Sheet1') ... df2.to_excel(writer, sheet_name='Sheet2') You can set the date format or datetime format: >>> with ExcelWriter('path_to_file.xlsx', ... date_format='YYYY-MM-DD', ... datetime_format='YYYY-MM-DD HH:MM:SS') as writer: ... df.to_excel(writer) You can also append to an existing Excel file: >>> with ExcelWriter('path_to_file.xlsx', mode='a') as writer: ... df.to_excel(writer, sheet_name='Sheet3') You can store Excel file in RAM: >>> import io >>> buffer = io.BytesIO() >>> with pd.ExcelWriter(buffer) as writer: ... df.to_excel(writer) You can pack Excel file into zip archive: >>> import zipfile >>> with zipfile.ZipFile('path_to_file.zip', 'w') as zf: ... with zf.open('filename.xlsx', 'w') as buffer: ... with pd.ExcelWriter(buffer) as writer: ... df.to_excel(writer) """ # Defining an ExcelWriter implementation (see abstract methods for more...) # - Mandatory # - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)`` # --> called to write additional DataFrames to disk # - ``supported_extensions`` (tuple of supported extensions), used to # check that engine supports the given extension. # - ``engine`` - string that gives the engine name. Necessary to # instantiate class directly and bypass ``ExcelWriterMeta`` engine # lookup. # - ``save(self)`` --> called to save file to disk # - Mostly mandatory (i.e. should at least exist) # - book, cur_sheet, path # - Optional: # - ``__init__(self, path, engine=None, **kwargs)`` --> always called # with path as first argument. # You also need to register the class with ``register_writer()``. # Technically, ExcelWriter implementations don't need to subclass # ExcelWriter. def __new__(cls, path, engine=None, **kwargs): # only switch class if generic(ExcelWriter) if cls is ExcelWriter: if engine is None or (isinstance(engine, str) and engine == "auto"): if isinstance(path, str): ext = os.path.splitext(path)[-1][1:] else: ext = "xlsx" try: engine = config.get_option(f"io.excel.{ext}.writer", silent=True) if engine == "auto": engine = get_default_writer(ext) except KeyError as err: raise ValueError( f"No engine for filetype: '{ext}'") from err if engine == "xlwt": xls_config_engine = config.get_option("io.excel.xls.writer", silent=True) # Don't warn a 2nd time if user has changed the default engine for xls if xls_config_engine != "xlwt": warnings.warn( "As the xlwt package is no longer maintained, the xlwt " "engine will be removed in a future version of pandas. " "This is the only engine in pandas that supports writing " "in the xls format. Install openpyxl and write to an xlsx " "file instead. You can set the option io.excel.xls.writer " "to 'xlwt' to silence this warning. While this option is " "deprecated and will also raise a warning, it can " "be globally set and the warning suppressed.", FutureWarning, stacklevel=4, ) cls = get_writer(engine) return object.__new__(cls) # declare external properties you can count on curr_sheet = None path = None @property @abc.abstractmethod def supported_extensions(self): """Extensions that writer engine supports.""" pass @property @abc.abstractmethod def engine(self): """Name of engine.""" pass @abc.abstractmethod def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None): """ Write given formatted cells into Excel an excel sheet Parameters ---------- cells : generator cell of formatted data to save to Excel sheet sheet_name : str, default None Name of Excel sheet, if None, then use self.cur_sheet startrow : upper left cell row to dump data frame startcol : upper left cell column to dump data frame freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze """ pass @abc.abstractmethod def save(self): """ Save workbook to disk. """ pass def __init__( self, path: Union[FilePathOrBuffer, "ExcelWriter"], engine=None, date_format=None, datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, **engine_kwargs, ): # validate that this engine can handle the extension if isinstance(path, str): ext = os.path.splitext(path)[-1] self.check_extension(ext) # use mode to open the file if "b" not in mode: mode += "b" # use "a" for the user to append data to excel but internally use "r+" to let # the excel backend first read the existing file and then write any data to it mode = mode.replace("a", "r+") # cast ExcelWriter to avoid adding 'if self.handles is not None' self.handles = IOHandles(cast(Buffer, path), compression={"copression": None}) if not isinstance(path, ExcelWriter): self.handles = get_handle(path, mode, storage_options=storage_options, is_text=False) self.sheets: Dict[str, Any] = {} self.cur_sheet = None if date_format is None: self.date_format = "YYYY-MM-DD" else: self.date_format = date_format if datetime_format is None: self.datetime_format = "YYYY-MM-DD HH:MM:SS" else: self.datetime_format = datetime_format self.mode = mode def __fspath__(self): return getattr(self.handles.handle, "name", "") def _get_sheet_name(self, sheet_name): if sheet_name is None: sheet_name = self.cur_sheet if sheet_name is None: # pragma: no cover raise ValueError( "Must pass explicit sheet_name or set cur_sheet property") return sheet_name def _value_with_fmt(self, val): """ Convert numpy types to Python types for the Excel writers. Parameters ---------- val : object Value to be written into cells Returns ------- Tuple with the first element being the converted value and the second being an optional format """ fmt = None if is_integer(val): val = int(val) elif is_float(val): val = float(val) elif is_bool(val): val = bool(val) elif isinstance(val, datetime.datetime): fmt = self.datetime_format elif isinstance(val, datetime.date): fmt = self.date_format elif isinstance(val, datetime.timedelta): val = val.total_seconds() / float(86400) fmt = "0" else: val = str(val) return val, fmt @classmethod def check_extension(cls, ext: str): """ checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError. """ if ext.startswith("."): ext = ext[1:] # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" # (not iterable) [attr-defined] if not any(ext in extension for extension in cls.supported_extensions # type: ignore[attr-defined] ): raise ValueError( f"Invalid extension for engine '{cls.engine}': '{ext}'") else: return True # Allow use as a contextmanager def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() def close(self): """synonym for save, to make it more file-like""" content = self.save() self.handles.close() return content
class BaseExcelReader(metaclass=abc.ABCMeta): def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): self.handles = IOHandles(handle=filepath_or_buffer, compression={"method": None}) if not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): self.handles = get_handle(filepath_or_buffer, "rb", storage_options=storage_options, is_text=False) if isinstance(self.handles.handle, self._workbook_class): self.book = self.handles.handle elif hasattr(self.handles.handle, "read"): # N.B. xlrd.Book has a read attribute too self.handles.handle.seek(0) self.book = self.load_workbook(self.handles.handle) elif isinstance(self.handles.handle, bytes): self.book = self.load_workbook(BytesIO(self.handles.handle)) else: raise ValueError( "Must explicitly set engine if not passing in buffer or path for io." ) @property @abc.abstractmethod def _workbook_class(self): pass @abc.abstractmethod def load_workbook(self, filepath_or_buffer): pass def close(self): self.handles.close() @property @abc.abstractmethod def sheet_names(self): pass @abc.abstractmethod def get_sheet_by_name(self, name): pass @abc.abstractmethod def get_sheet_by_index(self, index): pass @abc.abstractmethod def get_sheet_data(self, sheet, convert_float): pass def parse( self, sheet_name=0, header=0, names=None, index_col=None, usecols=None, squeeze=False, dtype=None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, verbose=False, parse_dates=False, date_parser=None, thousands=None, comment=None, skipfooter=0, convert_float=True, mangle_dupe_cols=True, **kwds, ): validate_header_arg(header) ret_dict = False # Keep sheetname to maintain backwards compatibility. if isinstance(sheet_name, list): sheets = sheet_name ret_dict = True elif sheet_name is None: sheets = self.sheet_names ret_dict = True else: sheets = [sheet_name] # handle same-type duplicates. sheets = list(dict.fromkeys(sheets).keys()) output = {} for asheetname in sheets: if verbose: print(f"Reading sheet {asheetname}") if isinstance(asheetname, str): sheet = self.get_sheet_by_name(asheetname) else: # assume an integer if not a string sheet = self.get_sheet_by_index(asheetname) data = self.get_sheet_data(sheet, convert_float) usecols = maybe_convert_usecols(usecols) if not data: output[asheetname] = DataFrame() continue if is_list_like(header) and len(header) == 1: header = header[0] # forward fill and pull out names for MultiIndex column header_names = None if header is not None and is_list_like(header): header_names = [] control_row = [True] * len(data[0]) for row in header: if is_integer(skiprows): row += skiprows data[row], control_row = fill_mi_header( data[row], control_row) if index_col is not None: header_name, _ = pop_header_name(data[row], index_col) header_names.append(header_name) if is_list_like(index_col): # Forward fill values for MultiIndex index. if header is None: offset = 0 elif not is_list_like(header): offset = 1 + header else: offset = 1 + max(header) # Check if we have an empty dataset # before trying to collect data. if offset < len(data): for col in index_col: last = data[offset][col] for row in range(offset + 1, len(data)): if data[row][col] == "" or data[row][col] is None: data[row][col] = last else: last = data[row][col] has_index_names = is_list_like(header) and len(header) > 1 # GH 12292 : error when read one empty column from excel file try: parser = TextParser( data, names=names, header=header, index_col=index_col, has_index_names=has_index_names, squeeze=squeeze, dtype=dtype, true_values=true_values, false_values=false_values, skiprows=skiprows, nrows=nrows, na_values=na_values, parse_dates=parse_dates, date_parser=date_parser, thousands=thousands, comment=comment, skipfooter=skipfooter, usecols=usecols, mangle_dupe_cols=mangle_dupe_cols, **kwds, ) output[asheetname] = parser.read(nrows=nrows) if not squeeze or isinstance(output[asheetname], DataFrame): if header_names: output[asheetname].columns = output[ asheetname].columns.set_names(header_names) except EmptyDataError: # No Data, return an empty DataFrame output[asheetname] = DataFrame() if ret_dict: return output else: return output[asheetname]
class BaseExcelReader(metaclass=abc.ABCMeta): def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None): self.handles = IOHandles(handle=filepath_or_buffer, compression={"method": None}) if not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)): self.handles = get_handle(filepath_or_buffer, "rb", storage_options=storage_options, is_text=False) if isinstance(self.handles.handle, self._workbook_class): self.book = self.handles.handle elif hasattr(self.handles.handle, "read"): # N.B. xlrd.Book has a read attribute too self.handles.handle.seek(0) try: self.book = self.load_workbook(self.handles.handle) except Exception: self.close() raise elif isinstance(self.handles.handle, bytes): self.book = self.load_workbook(BytesIO(self.handles.handle)) else: raise ValueError( "Must explicitly set engine if not passing in buffer or path for io." ) @property @abc.abstractmethod def _workbook_class(self): pass @abc.abstractmethod def load_workbook(self, filepath_or_buffer): pass def close(self): if hasattr(self, "book") and hasattr(self.book, "close"): # pyxlsb: opens a TemporaryFile # openpyxl: https://stackoverflow.com/questions/31416842/ # openpyxl-does-not-close-excel-workbook-in-read-only-mode self.book.close() self.handles.close() @property @abc.abstractmethod def sheet_names(self): pass @abc.abstractmethod def get_sheet_by_name(self, name): pass @abc.abstractmethod def get_sheet_by_index(self, index): pass @abc.abstractmethod def get_sheet_data(self, sheet, convert_float): pass def raise_if_bad_sheet_by_index(self, index: int) -> None: n_sheets = len(self.sheet_names) if index >= n_sheets: raise ValueError( f"Worksheet index {index} is invalid, {n_sheets} worksheets found" ) def raise_if_bad_sheet_by_name(self, name: str) -> None: if name not in self.sheet_names: raise ValueError(f"Worksheet named '{name}' not found") def parse( self, sheet_name=0, header=0, names=None, index_col=None, usecols=None, squeeze=False, dtype: DtypeArg | None = None, true_values=None, false_values=None, skiprows=None, nrows=None, na_values=None, verbose=False, parse_dates=False, date_parser=None, thousands=None, comment=None, skipfooter=0, convert_float=None, mangle_dupe_cols=True, **kwds, ): if convert_float is None: convert_float = True else: stacklevel = find_stack_level() warnings.warn( "convert_float is deprecated and will be removed in a future version.", FutureWarning, stacklevel=stacklevel, ) validate_header_arg(header) ret_dict = False # Keep sheetname to maintain backwards compatibility. if isinstance(sheet_name, list): sheets = sheet_name ret_dict = True elif sheet_name is None: sheets = self.sheet_names ret_dict = True else: sheets = [sheet_name] # handle same-type duplicates. sheets = list(dict.fromkeys(sheets).keys()) output = {} for asheetname in sheets: if verbose: print(f"Reading sheet {asheetname}") if isinstance(asheetname, str): sheet = self.get_sheet_by_name(asheetname) else: # assume an integer if not a string sheet = self.get_sheet_by_index(asheetname) data = self.get_sheet_data(sheet, convert_float) if hasattr(sheet, "close"): # pyxlsb opens two TemporaryFiles sheet.close() usecols = maybe_convert_usecols(usecols) if not data: output[asheetname] = DataFrame() continue if is_list_like(header) and len(header) == 1: header = header[0] # forward fill and pull out names for MultiIndex column header_names = None if header is not None and is_list_like(header): header_names = [] control_row = [True] * len(data[0]) for row in header: if is_integer(skiprows): row += skiprows data[row], control_row = fill_mi_header( data[row], control_row) if index_col is not None: header_name, _ = pop_header_name(data[row], index_col) header_names.append(header_name) # If there is a MultiIndex header and an index then there is also # a row containing just the index name(s) has_index_names = (is_list_like(header) and len(header) > 1 and index_col is not None) if is_list_like(index_col): # Forward fill values for MultiIndex index. if header is None: offset = 0 elif not is_list_like(header): offset = 1 + header else: offset = 1 + max(header) # GH34673: if MultiIndex names present and not defined in the header, # offset needs to be incremented so that forward filling starts # from the first MI value instead of the name if has_index_names: offset += 1 # Check if we have an empty dataset # before trying to collect data. if offset < len(data): for col in index_col: last = data[offset][col] for row in range(offset + 1, len(data)): if data[row][col] == "" or data[row][col] is None: data[row][col] = last else: last = data[row][col] # GH 12292 : error when read one empty column from excel file try: parser = TextParser( data, names=names, header=header, index_col=index_col, has_index_names=has_index_names, squeeze=squeeze, dtype=dtype, true_values=true_values, false_values=false_values, skiprows=skiprows, nrows=nrows, na_values=na_values, skip_blank_lines=False, # GH 39808 parse_dates=parse_dates, date_parser=date_parser, thousands=thousands, comment=comment, skipfooter=skipfooter, usecols=usecols, mangle_dupe_cols=mangle_dupe_cols, **kwds, ) output[asheetname] = parser.read(nrows=nrows) if not squeeze or isinstance(output[asheetname], DataFrame): if header_names: output[asheetname].columns = output[ asheetname].columns.set_names(header_names) except EmptyDataError: # No Data, return an empty DataFrame output[asheetname] = DataFrame() if ret_dict: return output else: return output[asheetname]
class ExcelWriter(metaclass=abc.ABCMeta): """ Class for writing DataFrame objects into excel sheets. Default is to use : * xlwt for xls * xlsxwriter for xlsx if xlsxwriter is installed otherwise openpyxl * odf for ods. See DataFrame.to_excel for typical usage. The writer should be used as a context manager. Otherwise, call `close()` to save and close any opened file handles. Parameters ---------- path : str or typing.BinaryIO Path to xls or xlsx or ods file. engine : str (optional) Engine to use for writing. If None, defaults to ``io.excel.<extension>.writer``. NOTE: can only be passed as a keyword argument. .. deprecated:: 1.2.0 As the `xlwt <https://pypi.org/project/xlwt/>`__ package is no longer maintained, the ``xlwt`` engine will be removed in a future version of pandas. date_format : str, default None Format string for dates written into Excel files (e.g. 'YYYY-MM-DD'). datetime_format : str, default None Format string for datetime objects written into Excel files. (e.g. 'YYYY-MM-DD HH:MM:SS'). mode : {'w', 'a'}, default 'w' File mode to use (write or append). Append does not work with fsspec URLs. storage_options : dict, optional Extra options that make sense for a particular storage connection, e.g. host, port, username, password, etc., if using a URL that will be parsed by ``fsspec``, e.g., starting "s3://", "gcs://". .. versionadded:: 1.2.0 if_sheet_exists : {'error', 'new', 'replace', 'overlay'}, default 'error' How to behave when trying to write to a sheet that already exists (append mode only). * error: raise a ValueError. * new: Create a new sheet, with a name determined by the engine. * replace: Delete the contents of the sheet before writing to it. * overlay: Write contents to the existing sheet without removing the old contents. .. versionadded:: 1.3.0 .. versionchanged:: 1.4.0 Added ``overlay`` option engine_kwargs : dict, optional Keyword arguments to be passed into the engine. These will be passed to the following functions of the respective engines: * xlsxwriter: ``xlsxwriter.Workbook(file, **engine_kwargs)`` * openpyxl (write mode): ``openpyxl.Workbook(**engine_kwargs)`` * openpyxl (append mode): ``openpyxl.load_workbook(file, **engine_kwargs)`` * odswriter: ``odf.opendocument.OpenDocumentSpreadsheet(**engine_kwargs)`` .. versionadded:: 1.3.0 **kwargs : dict, optional Keyword arguments to be passed into the engine. .. deprecated:: 1.3.0 Use engine_kwargs instead. Attributes ---------- None Methods ------- None Notes ----- None of the methods and properties are considered public. For compatibility with CSV writers, ExcelWriter serializes lists and dicts to strings before writing. Examples -------- Default usage: >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) >>> with pd.ExcelWriter("path_to_file.xlsx") as writer: ... df.to_excel(writer) To write to separate sheets in a single file: >>> df1 = pd.DataFrame([["AAA", "BBB"]], columns=["Spam", "Egg"]) >>> df2 = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) >>> with pd.ExcelWriter("path_to_file.xlsx") as writer: ... df1.to_excel(writer, sheet_name="Sheet1") ... df2.to_excel(writer, sheet_name="Sheet2") You can set the date format or datetime format: >>> from datetime import date, datetime >>> df = pd.DataFrame( ... [ ... [date(2014, 1, 31), date(1999, 9, 24)], ... [datetime(1998, 5, 26, 23, 33, 4), datetime(2014, 2, 28, 13, 5, 13)], ... ], ... index=["Date", "Datetime"], ... columns=["X", "Y"], ... ) >>> with pd.ExcelWriter( ... "path_to_file.xlsx", ... date_format="YYYY-MM-DD", ... datetime_format="YYYY-MM-DD HH:MM:SS" ... ) as writer: ... df.to_excel(writer) You can also append to an existing Excel file: >>> with pd.ExcelWriter("path_to_file.xlsx", mode="a", engine="openpyxl") as writer: ... df.to_excel(writer, sheet_name="Sheet3") Here, the `if_sheet_exists` parameter can be set to replace a sheet if it already exists: >>> with ExcelWriter( ... "path_to_file.xlsx", ... mode="a", ... engine="openpyxl", ... if_sheet_exists="replace", ... ) as writer: ... df.to_excel(writer, sheet_name="Sheet1") You can also write multiple DataFrames to a single sheet. Note that the ``if_sheet_exists`` parameter needs to be set to ``overlay``: >>> with ExcelWriter("path_to_file.xlsx", ... mode="a", ... engine="openpyxl", ... if_sheet_exists="overlay", ... ) as writer: ... df1.to_excel(writer, sheet_name="Sheet1") ... df2.to_excel(writer, sheet_name="Sheet1", startcol=3) You can store Excel file in RAM: >>> import io >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) >>> buffer = io.BytesIO() >>> with pd.ExcelWriter(buffer) as writer: ... df.to_excel(writer) You can pack Excel file into zip archive: >>> import zipfile >>> df = pd.DataFrame([["ABC", "XYZ"]], columns=["Foo", "Bar"]) >>> with zipfile.ZipFile("path_to_file.zip", "w") as zf: ... with zf.open("filename.xlsx", "w") as buffer: ... with pd.ExcelWriter(buffer) as writer: ... df.to_excel(writer) You can specify additional arguments to the underlying engine: >>> with pd.ExcelWriter( ... "path_to_file.xlsx", ... engine="xlsxwriter", ... engine_kwargs={"options": {"nan_inf_to_errors": True}} ... ) as writer: ... df.to_excel(writer) In append mode, ``engine_kwargs`` are passed through to openpyxl's ``load_workbook``: >>> with pd.ExcelWriter( ... "path_to_file.xlsx", ... engine="openpyxl", ... mode="a", ... engine_kwargs={"keep_vba": True} ... ) as writer: ... df.to_excel(writer, sheet_name="Sheet2") """ # Defining an ExcelWriter implementation (see abstract methods for more...) # - Mandatory # - ``write_cells(self, cells, sheet_name=None, startrow=0, startcol=0)`` # --> called to write additional DataFrames to disk # - ``supported_extensions`` (tuple of supported extensions), used to # check that engine supports the given extension. # - ``engine`` - string that gives the engine name. Necessary to # instantiate class directly and bypass ``ExcelWriterMeta`` engine # lookup. # - ``save(self)`` --> called to save file to disk # - Mostly mandatory (i.e. should at least exist) # - book, cur_sheet, path # - Optional: # - ``__init__(self, path, engine=None, **kwargs)`` --> always called # with path as first argument. # You also need to register the class with ``register_writer()``. # Technically, ExcelWriter implementations don't need to subclass # ExcelWriter. def __new__( cls, path: FilePath | WriteExcelBuffer | ExcelWriter, engine=None, date_format=None, datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, if_sheet_exists: str | None = None, engine_kwargs: dict | None = None, **kwargs, ): if kwargs: if engine_kwargs is not None: raise ValueError("Cannot use both engine_kwargs and **kwargs") warnings.warn( "Use of **kwargs is deprecated, use engine_kwargs instead.", FutureWarning, stacklevel=find_stack_level(), ) # only switch class if generic(ExcelWriter) if cls is ExcelWriter: if engine is None or (isinstance(engine, str) and engine == "auto"): if isinstance(path, str): ext = os.path.splitext(path)[-1][1:] else: ext = "xlsx" try: engine = config.get_option(f"io.excel.{ext}.writer", silent=True) if engine == "auto": engine = get_default_engine(ext, mode="writer") except KeyError as err: raise ValueError( f"No engine for filetype: '{ext}'") from err if engine == "xlwt": xls_config_engine = config.get_option("io.excel.xls.writer", silent=True) # Don't warn a 2nd time if user has changed the default engine for xls if xls_config_engine != "xlwt": warnings.warn( "As the xlwt package is no longer maintained, the xlwt " "engine will be removed in a future version of pandas. " "This is the only engine in pandas that supports writing " "in the xls format. Install openpyxl and write to an xlsx " "file instead. You can set the option io.excel.xls.writer " "to 'xlwt' to silence this warning. While this option is " "deprecated and will also raise a warning, it can " "be globally set and the warning suppressed.", FutureWarning, stacklevel=find_stack_level(), ) cls = get_writer(engine) return object.__new__(cls) # declare external properties you can count on path = None @property @abc.abstractmethod def supported_extensions(self): """Extensions that writer engine supports.""" pass @property @abc.abstractmethod def engine(self): """Name of engine.""" pass @abc.abstractmethod def write_cells(self, cells, sheet_name=None, startrow=0, startcol=0, freeze_panes=None): """ Write given formatted cells into Excel an excel sheet Parameters ---------- cells : generator cell of formatted data to save to Excel sheet sheet_name : str, default None Name of Excel sheet, if None, then use self.cur_sheet startrow : upper left cell row to dump data frame startcol : upper left cell column to dump data frame freeze_panes: int tuple of length 2 contains the bottom-most row and right-most column to freeze """ pass @abc.abstractmethod def save(self): """ Save workbook to disk. """ pass def __init__( self, path: FilePath | WriteExcelBuffer | ExcelWriter, engine=None, date_format=None, datetime_format=None, mode: str = "w", storage_options: StorageOptions = None, if_sheet_exists: str | None = None, engine_kwargs: dict | None = None, **kwargs, ): # validate that this engine can handle the extension if isinstance(path, str): ext = os.path.splitext(path)[-1] self.check_extension(ext) # use mode to open the file if "b" not in mode: mode += "b" # use "a" for the user to append data to excel but internally use "r+" to let # the excel backend first read the existing file and then write any data to it mode = mode.replace("a", "r+") # cast ExcelWriter to avoid adding 'if self.handles is not None' self.handles = IOHandles(cast(IO[bytes], path), compression={"copression": None}) if not isinstance(path, ExcelWriter): self.handles = get_handle(path, mode, storage_options=storage_options, is_text=False) self.sheets: dict[str, Any] = {} self.cur_sheet = None if date_format is None: self.date_format = "YYYY-MM-DD" else: self.date_format = date_format if datetime_format is None: self.datetime_format = "YYYY-MM-DD HH:MM:SS" else: self.datetime_format = datetime_format self.mode = mode if if_sheet_exists not in (None, "error", "new", "replace", "overlay"): raise ValueError( f"'{if_sheet_exists}' is not valid for if_sheet_exists. " "Valid options are 'error', 'new', 'replace' and 'overlay'.") if if_sheet_exists and "r+" not in mode: raise ValueError( "if_sheet_exists is only valid in append mode (mode='a')") if if_sheet_exists is None: if_sheet_exists = "error" self.if_sheet_exists = if_sheet_exists def __fspath__(self): return getattr(self.handles.handle, "name", "") def _get_sheet_name(self, sheet_name): if sheet_name is None: sheet_name = self.cur_sheet if sheet_name is None: # pragma: no cover raise ValueError( "Must pass explicit sheet_name or set cur_sheet property") return sheet_name def _value_with_fmt(self, val): """ Convert numpy types to Python types for the Excel writers. Parameters ---------- val : object Value to be written into cells Returns ------- Tuple with the first element being the converted value and the second being an optional format """ fmt = None if is_integer(val): val = int(val) elif is_float(val): val = float(val) elif is_bool(val): val = bool(val) elif isinstance(val, datetime.datetime): fmt = self.datetime_format elif isinstance(val, datetime.date): fmt = self.date_format elif isinstance(val, datetime.timedelta): val = val.total_seconds() / 86400 fmt = "0" else: val = str(val) return val, fmt @classmethod def check_extension(cls, ext: str): """ checks that path's extension against the Writer's supported extensions. If it isn't supported, raises UnsupportedFiletypeError. """ if ext.startswith("."): ext = ext[1:] # error: "Callable[[ExcelWriter], Any]" has no attribute "__iter__" (not # iterable) if not any(ext in extension for extension in cls.supported_extensions # type: ignore[attr-defined] ): raise ValueError( f"Invalid extension for engine '{cls.engine}': '{ext}'") else: return True # Allow use as a contextmanager def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() def close(self): """synonym for save, to make it more file-like""" content = self.save() self.handles.close() return content