def sheet_to_df(self, index=1, header_rows=1, start_row=1, sheet=None): """Pull a worksheet into a DataFrame. Parameters ---------- index : int col number of index column, 0 or None for no index (default 1) header_rows : int number of rows that represent headers (default 1) start_row : int row number for first row of headers or data (default 1) sheet : str,int optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) Returns ------- DataFrame DataFrame with the data from the Worksheet """ if sheet is not None: self.open_sheet(sheet) if not self.sheet: raise NoWorksheetException("No open worksheet") vals = self._retry_func(self.sheet.get_all_values) vals = self._fix_merge_values(vals)[start_row - 1 :] col_names = parse_sheet_headers(vals, header_rows) # remove rows where everything is null, then replace nulls with '' df = ( pd.DataFrame(vals[header_rows or 0 :]) .replace("", np.nan) .dropna(how="all") .fillna("") ) if col_names is not None: if len(df.columns) == len(col_names): df.columns = col_names elif len(df) == 0: # if we have headers but no data, set column headers on empty DF df = df.reindex(columns=col_names) else: raise MissMatchException( "Column headers don't match number of data columns" ) return parse_sheet_index(df, index)
def sheet_to_df(self, index=1, headers=1, header_rows=1, start_row=1, sheet=None): """ Pull a worksheet into a DataFrame. :param int index: col number of index column, 0 or None for no index (default 1) :param int headers: (DEPRECATED - use `header_rows`) number of rows that represent headers (default 1) :param int header_rows: number of rows that represent headers (default 1) :param int start_row: row number for first row of headers or data (default 1) :param str,int sheet: optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) :returns: a DataFrame with the data from the Worksheet """ if sheet: self.open_sheet(sheet) if not self.sheet: raise Exception("No open worksheet") if headers != 1: deprecate("headers has been deprecated, use header_rows instead") header_rows = headers vals = self._retry_get_all_values() vals = self._fix_merge_values(vals)[start_row - 1:] col_names = parse_sheet_headers(vals, header_rows) # remove rows where everything is null, then replace nulls with '' df = pd.DataFrame(vals[header_rows or 0:])\ .replace('', np.nan)\ .dropna(how='all')\ .fillna('') if col_names is not None: if len(df.columns) == len(col_names): df.columns = col_names elif len(df) == 0: # if we have headers but no data, set column headers on empty DF df = df.reindex(columns=col_names) else: raise Exception( "Column headers don't match number of data columns") return parse_sheet_index(df, index)
def test_multiheader_blank_bottom(self, df_multiheader_blank_bottom): assert (util.parse_sheet_index(df_multiheader_blank_bottom, 1).index.name == "col1")
def test_multiheader_blank_top(self, df_multiheader_blank_top): assert (util.parse_sheet_index(df_multiheader_blank_top, 1).index.name == "subcol1")
def test_multiheader2(self, df_multiheader): assert util.parse_sheet_index(df_multiheader, 2).index.name == "subcol2"
def test_noop(self, df): assert util.parse_sheet_index(df, 0).index.name == "test_index"
def test_normal(self, df): assert util.parse_sheet_index(df, 1).index.name == "col1"
def sheet_to_df( self, index=1, header_rows=1, start_row=1, unformatted_columns=None, formula_columns=None, sheet=None, ): """ Pull a worksheet into a DataFrame. Parameters ---------- index : int col number of index column, 0 or None for no index (default 1) header_rows : int number of rows that represent headers (default 1) start_row : int row number for first row of headers or data (default 1) unformatted_columns : list column numbers or names for columns you'd like to pull in as unformatted values (defaul []) formula_columns : list column numbers or names for columns you'd like to pull in as actual formulas (defaul []) sheet : str,int optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.spread.Spread.open_sheet>` (default None) Returns ------- DataFrame DataFrame with the data from the Worksheet """ self._ensure_sheet(sheet) vals = self.sheet.get_all_values() vals = self._fix_merge_values(vals)[start_row - 1:] col_names = parse_sheet_headers(vals, header_rows) # remove rows where everything is null, then replace nulls with '' df = (pd.DataFrame(vals[header_rows or 0:]).replace( "", np.nan).dropna(how="all").fillna("")) # replace values with a different value render option before we set the # index in set_col_names if unformatted_columns: self._fix_value_render( df, header_rows + start_row - 1, col_names, unformatted_columns, "UNFORMATTED_VALUE", ) if formula_columns: self._fix_value_render(df, header_rows + start_row - 1, col_names, formula_columns, "FORMULA") df = set_col_names(df, col_names) return parse_sheet_index(df, index)
def test_parse_sheet_index_multiheader2(df_multiheader): """In a multi-header situation, it should use the lower heading as the index name""" assert util.parse_sheet_index(df_multiheader, 2).index.name == 'subcol2'
def test_parse_sheet_index_noop(df): assert util.parse_sheet_index(df, 0).index.name == 'test_index'
def test_parse_sheet_index(df): assert util.parse_sheet_index(df, 1).index.name == 'col1'