def _get_update_chunks(self, start, end, vals): start = get_cell_as_tuple(start) end = get_cell_as_tuple(end) num_cols = end[COL] - start[COL] + 1 num_rows = end[ROW] - start[ROW] + 1 num_cells = num_cols * num_rows if num_cells != len(vals): raise MissMatchException( "Number of values needs to match number of cells") chunk_rows = self._max_range_chunk_size // num_cols chunk_size = chunk_rows * num_cols end_cell = (start[ROW] - 1, 0) for val_chunks in chunks(vals, int(chunk_size)): start_cell = (end_cell[ROW] + 1, start[COL]) end_cell = ( min(start_cell[ROW] + chunk_rows - 1, start[ROW] + num_rows - 1), end[COL], ) yield start_cell, end_cell, val_chunks
def update_cells(self, start, end, vals, sheet=None): """ Update the values in a given range. The values should be listed in order from left to right across rows. :param tuple,str start: tuple indicating (row, col) or string like 'A1' :param tuple,str end: tuple indicating (row, col) or string like 'Z20' :param list vals: array of values to populate :param str,int,Worksheet sheet: optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) """ if sheet: self.open_sheet(sheet) if not self.sheet: raise NoWorksheetException("No open worksheet") if start == end: return for start_cell, end_cell, val_chunks in self._get_update_chunks( start, end, vals): rng = get_range(start_cell, end_cell) cells = self._retry_range(rng) if len(val_chunks) != len(cells): raise MissMatchException( "Number of chunked values doesn't match number of cells") for val, cell in zip(val_chunks, cells): cell.value = val self._retry_update(cells)
def sheet_to_df(self, index=1, header_rows=1, start_row=1, sheet=None): """Pull a worksheet into a DataFrame. Parameters ---------- index : int col number of index column, 0 or None for no index (default 1) header_rows : int number of rows that represent headers (default 1) start_row : int row number for first row of headers or data (default 1) sheet : str,int optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) Returns ------- DataFrame DataFrame with the data from the Worksheet """ if sheet is not None: self.open_sheet(sheet) if not self.sheet: raise NoWorksheetException("No open worksheet") vals = self._retry_func(self.sheet.get_all_values) vals = self._fix_merge_values(vals)[start_row - 1 :] col_names = parse_sheet_headers(vals, header_rows) # remove rows where everything is null, then replace nulls with '' df = ( pd.DataFrame(vals[header_rows or 0 :]) .replace("", np.nan) .dropna(how="all") .fillna("") ) if col_names is not None: if len(df.columns) == len(col_names): df.columns = col_names elif len(df) == 0: # if we have headers but no data, set column headers on empty DF df = df.reindex(columns=col_names) else: raise MissMatchException( "Column headers don't match number of data columns" ) return parse_sheet_index(df, index)
def update_cells(self, start, end, vals, sheet=None, raw_columns=None): """ Update the values in a given range. The values should be listed in order from left to right across rows. Parameters ---------- start : tuple,str tuple indicating (row, col) or string like 'A1' end : tuple,str tuple indicating (row, col) or string like 'Z20' vals : list array of values to populate sheet : str,int,Worksheet optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.spread.Spread.open_sheet>` (default None) raw_columns : list, int optional, list of column numbers in the google sheet that should be interpreted as "RAW" input Returns ------- None """ self._ensure_sheet(sheet) for start_cell, end_cell, val_chunks in self._get_update_chunks( start, end, vals): rng = get_range(start_cell, end_cell) cells = self.sheet.range(rng) if len(val_chunks) != len(cells): raise MissMatchException( "Number of chunked values doesn't match number of cells") for val, cell in zip(val_chunks, cells): cell.value = val if raw_columns: assert isinstance(raw_columns, list), "raw_columns must be a list of ints" raw_cells = [i for i in cells if i.col in raw_columns] self.sheet.update_cells(raw_cells, ValueInputOption.raw) else: raw_cells = [] user_cells = [i for i in cells if i not in raw_cells] if user_cells: self.sheet.update_cells(user_cells, ValueInputOption.user_entered)
def set_col_names(df, col_names): """Set the column names on the DataFrame and ensure the set even if there's no data.""" if col_names is not None: if len(df.columns) == len(col_names): df.columns = col_names elif len(df) == 0: # if we have headers but no data, set column headers on empty DF df = df.reindex(columns=col_names) else: raise MissMatchException( "Column headers don't match number of data columns") return df
def update_cells(self, start, end, vals, sheet=None): """Update the values in a given range. The values should be listed in order from left to right across rows. Parameters ---------- start : tuple,str tuple indicating (row, col) or string like 'A1' end : tuple,str tuple indicating (row, col) or string like 'Z20' vals : list array of values to populate sheet : str,int,Worksheet optional, if you want to open a different sheet first, see :meth:`open_sheet <gspread_pandas.client.Spread.open_sheet>` (default None) Returns ------- None """ if sheet is not None: self.open_sheet(sheet) if not self.sheet: raise NoWorksheetException("No open worksheet") for start_cell, end_cell, val_chunks in self._get_update_chunks( start, end, vals ): rng = get_range(start_cell, end_cell) cells = self._retry_func(partial(self.sheet.range, rng)) if len(val_chunks) != len(cells): raise MissMatchException( "Number of chunked values doesn't match number of cells" ) for val, cell in zip(val_chunks, cells): cell.value = val self._retry_func(partial(self.sheet.update_cells, cells, "USER_ENTERED"))