def sheet_to_df(self, index=1, header_rows=1, start_row=1, sheet=None): """Pull a worksheet into a DataFrame. Parameters ---------- index : int col number of index column, 0 or None for no index (default 1) header_rows : int number of rows that represent headers (default 1) start_row : int row number for first row of headers or data (default 1) sheet : str,int optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) Returns ------- DataFrame DataFrame with the data from the Worksheet """ if sheet is not None: self.open_sheet(sheet) if not self.sheet: raise NoWorksheetException("No open worksheet") vals = self._retry_func(self.sheet.get_all_values) vals = self._fix_merge_values(vals)[start_row - 1 :] col_names = parse_sheet_headers(vals, header_rows) # remove rows where everything is null, then replace nulls with '' df = ( pd.DataFrame(vals[header_rows or 0 :]) .replace("", np.nan) .dropna(how="all") .fillna("") ) if col_names is not None: if len(df.columns) == len(col_names): df.columns = col_names elif len(df) == 0: # if we have headers but no data, set column headers on empty DF df = df.reindex(columns=col_names) else: raise MissMatchException( "Column headers don't match number of data columns" ) return parse_sheet_index(df, index)
def sheet_to_df(self, index=1, headers=1, header_rows=1, start_row=1, sheet=None): """ Pull a worksheet into a DataFrame. :param int index: col number of index column, 0 or None for no index (default 1) :param int headers: (DEPRECATED - use `header_rows`) number of rows that represent headers (default 1) :param int header_rows: number of rows that represent headers (default 1) :param int start_row: row number for first row of headers or data (default 1) :param str,int sheet: optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) :returns: a DataFrame with the data from the Worksheet """ if sheet: self.open_sheet(sheet) if not self.sheet: raise Exception("No open worksheet") if headers != 1: deprecate("headers has been deprecated, use header_rows instead") header_rows = headers vals = self._retry_get_all_values() vals = self._fix_merge_values(vals)[start_row - 1:] col_names = parse_sheet_headers(vals, header_rows) # remove rows where everything is null, then replace nulls with '' df = pd.DataFrame(vals[header_rows or 0:])\ .replace('', np.nan)\ .dropna(how='all')\ .fillna('') if col_names is not None: if len(df.columns) == len(col_names): df.columns = col_names elif len(df) == 0: # if we have headers but no data, set column headers on empty DF df = df.reindex(columns=col_names) else: raise Exception( "Column headers don't match number of data columns") return parse_sheet_index(df, index)
def test_multiheader3(self, data_multiheader): """Note that 'test_index' and 1 should be shifted up.""" expected = pd.MultiIndex.from_arrays([["test_index", "col1", "col1"], [1, "subcol1", "subcol2"], ["", 2, 3]]) assert util.parse_sheet_headers(data_multiheader, 3).equals(expected)
def test_normal(self, data_multiheader): expected = pd.Index(["", "col1", "col1"]) assert util.parse_sheet_headers(data_multiheader, 1).equals(expected)
def test_empty(self, data_empty): assert util.parse_sheet_headers(data_empty, 0) is None
def sheet_to_df( self, index=1, header_rows=1, start_row=1, unformatted_columns=None, formula_columns=None, sheet=None, ): """ Pull a worksheet into a DataFrame. Parameters ---------- index : int col number of index column, 0 or None for no index (default 1) header_rows : int number of rows that represent headers (default 1) start_row : int row number for first row of headers or data (default 1) unformatted_columns : list column numbers or names for columns you'd like to pull in as unformatted values (defaul []) formula_columns : list column numbers or names for columns you'd like to pull in as actual formulas (defaul []) sheet : str,int optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.spread.Spread.open_sheet>` (default None) Returns ------- DataFrame DataFrame with the data from the Worksheet """ self._ensure_sheet(sheet) vals = self.sheet.get_all_values() vals = self._fix_merge_values(vals)[start_row - 1:] col_names = parse_sheet_headers(vals, header_rows) # remove rows where everything is null, then replace nulls with '' df = (pd.DataFrame(vals[header_rows or 0:]).replace( "", np.nan).dropna(how="all").fillna("")) # replace values with a different value render option before we set the # index in set_col_names if unformatted_columns: self._fix_value_render( df, header_rows + start_row - 1, col_names, unformatted_columns, "UNFORMATTED_VALUE", ) if formula_columns: self._fix_value_render(df, header_rows + start_row - 1, col_names, formula_columns, "FORMULA") df = set_col_names(df, col_names) return parse_sheet_index(df, index)
def test_parse_sheet_headers_multiheader3(data_multiheader): """Note that 'test_index' and 1 should be shifted up""" expected = pd.MultiIndex.from_arrays([['test_index', 'col1', 'col1'], [1, 'subcol1', 'subcol2'], ['', 2, 3]]) assert util.parse_sheet_headers(data_multiheader, 3).equals(expected)
def test_parse_sheet_headers_normal(data_multiheader): expected = pd.Index(['', 'col1', 'col1']) assert util.parse_sheet_headers(data_multiheader, 1).equals(expected)
def test_parse_sheet_headers_multiheader(data_multiheader): """Note that 'test_index' should be shifted up""" expected = pd.MultiIndex.from_arrays( [["test_index", "col1", "col1"], ["", "subcol1", "subcol2"]] ) assert util.parse_sheet_headers(data_multiheader, 2).equals(expected)