def get_or_create_sheet(wks: gspread.Spreadsheet, name, *, rows=0, cols=0) -> gspread.Worksheet: try: return wks.worksheet(name) except gspread.WorksheetNotFound: logger.info("worksheet %r is not found in %r, creating it", name, wks) return wks.add_worksheet(title=name, rows=rows, cols=cols)
def get_worksheet(sheet: Spreadsheet, worksheet_name: str) -> Worksheet: worksheets = sheet.worksheets() for worksheet in worksheets: if worksheet.title == worksheet_name: return worksheet raise Exception('worksheetが存在しません')
def from_spreadsheet(cls, sheet: Spreadsheet) -> TrackingPlan: """Creates an instance of :class:`~gaunit.TrackingPlan` from a Google Spreadsheet. This method uses gspread to connect to Google Sheets and import test cases and expected events. See Documentation for the spreadsheet format. Examples: >>> import gspread >>> from gaunit import TrackingPlan >>> gc = gspread.service_account() # authentication >>> sh = gc.open("Example spreadsheet") # open spreadsheet >>> tp = TrackingPlan.from_spreadsheet(sh) # import tracking plan Args: sheet (gspread.Spreadsheet): gspread instance of the spreadsheet to import Returns: :class:`TrackingPlan` instance. """ tp = TrackingPlan() worksheets = sheet.worksheets() for w in worksheets: events = w.get_all_records() events = format_events(events) tp.add_test_case(w.title, events) return tp
def open(self, title): """ Opens a spreadsheet. :param title: A title of a spreadsheet. :type title: str :returns: a :class:`~gspread.spreadsheet.Spreadsheet` instance. If there's more than one spreadsheet with same title the first one will be opened. :raises gspread.SpreadsheetNotFound: if no spreadsheet with specified `title` is found. >>> c = gspread.authorize(credentials) >>> c.open('My fancy spreadsheet') """ try: properties = finditem(lambda x: x["name"] == title, self.list_spreadsheet_files(title)) # Drive uses different terminology properties["title"] = properties["name"] return Spreadsheet(self, properties) except StopIteration: raise SpreadsheetNotFound
def get_sheet(spreadsheet: Spreadsheet, title: str, rows: int = None, cols: int = None) -> Worksheet: sheet = None try: sheet = spreadsheet.worksheet(title) except WorksheetNotFound as err: if rows and cols: sheet = spreadsheet.add_worksheet(title=title, rows=rows, cols=cols) else: pass return sheet
def create_or_replace_worksheet(spreadsheet: gspread.Spreadsheet, worksheet_name: str) -> gspread.Worksheet: """Creates or replaces a worksheet with name `worksheet_name`. Note(chris): Taking the approach of deleting worksheet to make sure that state of worksheet is totally clean. Other methods of clearing worksheet using gspread did not clear conditional formatting rules. Args: sheet: Spreadsheet worksheet_name: Name of worksheet. Returns: Newly created Worksheet. """ try: worksheet = spreadsheet.worksheet(worksheet_name) try: spreadsheet.del_worksheet(worksheet) except Exception: # If worksheet name exists but is the only worksheet, need to add a new tmp sheet # first then delete the old one new_worksheet = spreadsheet.add_worksheet("tmp", 100, 100) spreadsheet.del_worksheet(worksheet) new_worksheet.update_title(worksheet_name) return new_worksheet except gspread.WorksheetNotFound: pass return spreadsheet.add_worksheet(worksheet_name, 100, 100)
def get_dataframe_from_gsheet(sheet: gspread.Spreadsheet, columns: list) -> pd.DataFrame: """ Load in all the values from the google sheet. NOTE: excludes headers from gsheet and replaces them with the ones in constants :param sheet: (gspread.Spreadsheet) :param columns: (list of str) :return: (pd.DataFrame) """ return pd.DataFrame(sheet.get_all_values()[1:], columns=columns)
def create_or_clear_worksheet(sheet: gspread.Spreadsheet, worksheet_name: str) -> gspread.Worksheet: """Creates or clears a worksheet with name `worksheet_name`. Args: sheet: Spreadsheet worksheet_name: Name of worksheet. Returns: Worksheet with name `worksheet_name`. """ try: worksheet = sheet.worksheet(worksheet_name) worksheet.clear() return worksheet except gspread.WorksheetNotFound: pass return sheet.add_worksheet(worksheet_name, 100, 100)
def get_genre_colors(genre_sheet: Spreadsheet) -> Dict[str, Tuple[str, str]]: """Maps the name of a genre to its hex color""" genre_info_tab: Worksheet = genre_sheet.worksheet(GENRE_INFO_SHEET_NAME) genre_to_color: Dict[str, Tuple[str, str]] = {} all_records: List[Dict] = genre_info_tab.get_all_records() all_formats: List[List[CellFormat]] = list( get_effective_formats(genre_info_tab, row_start=2, col_start=1, col_end=genre_info_tab.col_count, row_end=genre_info_tab.row_count)) for (row_num, record) in enumerate(all_records): name: str = record["Genre"] if name in {"?", "Spare Color", "Total"}: continue # Keep climbing until the cell that contains the color is found # Done for genres that share colors and thus have a merged cell # E.x. Ambient and Atmospheric, Pop and Disco # Or it just works right away for unmerged cells for i in count(start=0): background_hex_color: str = all_records[row_num - i]["Color (#Hex)"].lower() if background_hex_color: foreground: CellFormatComponent = all_formats[ row_num - i][0].textFormat.foregroundColor fr, fg, fb = [ round((getattr(foreground, color_component, 0) or 0) * 255) for color_component in ["red", "green", "blue"] ] foreground_hex_color: str = f"#{fr:02x}{fg:02x}{fb:02x}" break else: # Manually intervene to see what the problem is breakpoint() genre_to_color[name] = (background_hex_color, foreground_hex_color) return genre_to_color
def update_all_small_cap_sheet(sh: Spreadsheet): all_small_cap_df: DataFrame = get_small_cap_tickers(min_volume=1) all_small_cap_df['volume'] = pandas.to_numeric(all_small_cap_df['volume']) stocks_in_play_worksheet: Worksheet = sh.worksheet("All Small Cap Stocks") stocks_in_play_worksheet.clear() stocks_in_play_worksheet.update( [all_small_cap_df.columns.values.tolist()] + all_small_cap_df.values.tolist()) stocks_in_play_worksheet.format( "F2:F", {'numberFormat': { "pattern": "###,###", "type": "NUMBER" }}) print('c')
def safe_open_sheet(book: gspread.Spreadsheet, sheet_name: str, rows=1000): """Open a Worksheet, if it doesn't exist, just create it.""" try: return book.worksheet(sheet_name) except gspread.exceptions.WorksheetNotFound: return book.add_worksheet(sheet_name, rows=rows, cols=20)
def build_up_track_information(genre_sheet: Spreadsheet, subgenre_sheet: Spreadsheet, start: date, end: date) -> List[Track]: if GENRE_SHEET_CATALOG_SHEET_NAME is None: raise ValueError( "the GENRE_SHEET_CATALOG_SHEET_NAME environment variable needs a value, like Main" ) genre_sheet_catalog = genre_sheet.worksheet(GENRE_SHEET_CATALOG_SHEET_NAME) subgenre_sheet_tabs = subgenre_sheet.worksheets() # The naming scheme of tabs on the Subgenre Sheet is 2020-2024, 2015-2019, ..., Pre-2010s relevant_tabs: List[Worksheet] = [] for tab in subgenre_sheet_tabs: years = parse("{min}-{max}", tab.title) if years is None: continue if years["min"] == "Pre": # Since the tab is called Pre-2010s, we have to fix that by chopping off the last character # We are also treating year 0 as the earliest someone would ever make music year_range = range(0, int(years["max"][:-1])) else: year_range = range(int(years["min"]), int(years["max"]) + 1) # For instance, 2013-2020 needs 2010-2014, 2015-2019, and 2020-2024 if any(year in year_range for year in range(end.year, start.year + 1)): relevant_tabs.append(tab) print( f"about to start hunting tracks from {start} to {end} down (this could take a while)" ) # 1 for skipping the header row, + 1 for arrays starting at 0 = 2 genre_sheet_tracks = [ genre_sheet_record_to_track(record=record, row=row + 2, source_tab=GENRE_SHEET_CATALOG_SHEET_NAME, source_tab_id=genre_sheet_catalog.id) for row, record in enumerate(genre_sheet_catalog.get_all_records()) ] subgenre_sheet_tracks = [[ subgenre_sheet_record_to_track(record=record, row=row + 2, source_tab=subgenre_sheet_tab.title, source_tab_id=subgenre_sheet_tab.id) for row, record in enumerate(subgenre_sheet_tab.get_all_records()) ] for subgenre_sheet_tab in relevant_tabs] chunks_of_rows = [] for track_catalog in [genre_sheet_tracks, *subgenre_sheet_tracks]: track_list = list(track_catalog) searchable_track_list = LazyBisectable( track_list, reversed=True, key=lambda track: datetime.strptime(track["release_date"], "%Y-%m-%d").date()) # Double reverse newest_index_inclusive = len(track_list) - bisect_right( searchable_track_list, start) oldest_index_exclusive = len(track_list) - bisect_left( searchable_track_list, end) oldest_index_inclusive = oldest_index_exclusive - 1 print( f"--- {track_list[newest_index_inclusive]['source_name']}: {track_list[newest_index_inclusive]['source_tab']} ---" ) print( f"{track_list[newest_index_inclusive]['title']} ({track_list[newest_index_inclusive]['release_date']}) тАФ {track_list[oldest_index_inclusive]['title']} ({track_list[oldest_index_inclusive]['release_date']})" ) print() chunks_of_rows.append( track_list[newest_index_inclusive:oldest_index_exclusive]) return list(chain(*chunks_of_rows))
def build_up_subgenre_information( genre_sheet: Spreadsheet ) -> Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]: "This thing is frightening." if GENRES_SHEET_NAME is None: raise ValueError( "the environment variable GENRES_SHEET_NAME is missing and needs to refer to the tab name where genre information is stored on the Google Sheet") genres_tab: Worksheet = genre_sheet.worksheet(GENRES_SHEET_NAME) entries: Iterator[List[str]] = iter(genres_tab.get_all_values()) # Not necessary for now, but it does advance the iterator which is important header: List[str] = [label.lower() for label in next(entries)] row_start: int = 2 col_start: int = 1 row_end: int = genres_tab.row_count col_end: int = 8 all_formats = list( get_effective_formats(genres_tab, row_start=row_start, col_start=col_start, row_end=row_end, col_end=col_end)) all_notes = list( get_notes(genres_tab, row_start=row_start, col_start=col_start, row_end=row_end, col_end=col_end)) # All genres (not subgenres) genres: Set[str] = set() # All subgenres (including genres) subgenres: Set[str] = set() # Used to track the parents in the moment (very frequently overwritten) # It's a mapping of the column number to the row number and subgenre / genre hierarchy: Dict[int, Tuple[int, str]] = {} # The set of the parents to the subgenre or genre in question # A simplistic example is "Hard Trap": {"Rawstyle", "Hybrid Trap"} # Or "Future Bass": {"Trap (EDM)", "Wonky", "Electronic Dance Music", "Purple Sound"} origins: DefaultDict[str, Set[str]] = defaultdict(set) # Map of subgenre to the genre whose color it inherits # E.x. "Ambient Pop": "Ambient" # E.x. "Vaportrap": "Vaporwave" # E.x. "Future Bass": "Future Bass" subgenre_to_genre: Dict[str, str] = {} subgenre_to_alternative_names: DefaultDict[str, Set[str]] = defaultdict( set) row_num: int row: List[str] col_num: int subgenre: str for row_num, row in enumerate(entries, start=row_start): for col_num, subgenre in enumerate(row, start=col_start): # Skip empty cells if not subgenre: continue # We found a genre (as opposed to a subgenre) if col_num == col_start: # Be confident that all genres are bold if __debug__ and not any( cell_format.textFormat.bold for cell_format in all_formats[row_num - row_start]): # Manually intervene to find the problem breakpoint() # Add it to the genres set genres.add(subgenre) # Update the hierarchy information hierarchy[col_num] = (row_num, subgenre) # Add origin information for subgenres only (because there will be an index error for genres at the left-most column) if col_num > col_start: parent_row, parent_subgenre = hierarchy[col_num - 1] # Last entry in the format list for this row (this assumes the cells are properly merged) parent_format = all_formats[parent_row - row_start][-1] # Be confident that there are no subgenres of strikethroughed or italicized subgenres / genres if __debug__ and (parent_format.textFormat.strikethrough or parent_format.textFormat.italic): # Manually intervene to see the problem breakpoint() # Add the parent to this subgenre's origins # Because we're working with a defaultdict, this will find or create the set origins[subgenre].add(parent_subgenre) # If this subgenre belongs to this genre without being italicized or strikethroughed, # then it takes on its color subgenre_format = all_formats[row_num - row_start][-1] if not subgenre_format.textFormat.strikethrough and not subgenre_format.textFormat.italic: subgenre_to_genre[subgenre] = hierarchy[1][1] try: # This can raise an IndexError because the function just cuts off rows (at the end) # that don't have any notes this_rows_notes = all_notes[row_num - row_start] except IndexError: # The note above tells us that this row doesn't have any notes pass else: # Find the note in the row try: note: str = next(filter(bool, this_rows_notes)) except StopIteration: # There are no notes pass else: subgenre_to_alternative_names[subgenre].update( parse_alternative_names(note)) # Finally, add this to the list of subgenres subgenres.add(subgenre) # And move onto the next row break # Skip checks outside of debug mode (aka where assertions don't mean anything) if __debug__: # Be confident that every subgenre has origins and belongs to a genre / has a color for subgenre in subgenres: if (subgenre not in genres) and (subgenre not in subgenre_to_genre or subgenre not in origins): # Manually intervene to see the problem breakpoint() for subgenre in subgenre_to_genre: if not len(origins[subgenre]): print(subgenre, "has no origin, if you were curious") # Finally, create a composite piece of data (for loading into Redis) full_data: DefaultDict[str, Dict[str, Any]] = defaultdict(dict) genre_to_color = get_genre_colors(genre_sheet) # Make sure the genres found in the genres tab matches the list # of genres from the genre stats tab if __debug__ and set(genre_to_color) != set(genres): # Manually intervene to see the problem breakpoint() aliases: Dict[str, str] = {} for subgenre in subgenres: full_data[subgenre]["name"] = subgenre full_data[subgenre]["alternative_names"] = tuple( sorted(subgenre_to_alternative_names[subgenre])) full_data[subgenre]["origins"] = tuple(origins[subgenre]) full_data[subgenre]["genre"] = subgenre_to_genre[subgenre] for parent in origins[subgenre]: # Includes derivatives (the only difference is that derivatives are genres) full_data[parent].setdefault("subgenres", set()).add(subgenre) for alias in subgenre_to_alternative_names[subgenre]: aliases[alias] = subgenre is_genre: bool = subgenre in genres full_data[subgenre]["is_genre"] = is_genre full_data[subgenre]["color"] = genre_to_color[subgenre if is_genre else subgenre_to_genre[subgenre]] # Now that that loop above has completed, "freeze" all the origins and subgenres into a tuple, and JSON-dump them for subgenre, data in full_data.items(): data["alternative_names"] = tuple(data.get("alternative_names", ())) data["origins"] = tuple(data.get("origins", ())) data["subgenres"] = tuple(data.get("subgenres", ())) return full_data, aliases
def get_google_sheet(self, spreadsheet: gspread.Spreadsheet, sheet: int): worksheet = spreadsheet.get_worksheet(sheet) data = worksheet.get_all_values() headers = data.pop(0) df = pd.DataFrame(data, columns=headers) return df
def get_specific_tab_url(sheet: gspread.Spreadsheet, tab_name: str): """The url for a specific tab is the url of the sheet concatenated with /edit#gid=<tab_id>""" return sheet.url + '/edit#gid=' + str(sheet.worksheet(tab_name).id)
def __format_sheet(self, wb: gspread.Spreadsheet, ws: gspread.Worksheet) -> tuple: """ This *private* method formats the worksheet. :param wb: the Google Spreadsheet file :param ws: the one Google Worksheet in the file :return: None """ # NOTE: The gspread method ws.format() could also be used to do the first six formats below. # However, each call to ws.format() would use a separate batch_update() API call. # So this uses the native Google Sheets API approach, but it only requires one batch_update() at the end. num_rows = ws.row_count num_cols = ws.col_count # See the Google Sheets API and gspread documentation for help sheet_id = ws._properties['sheetId'] if sheet_id: body = {'requests': []} if num_rows > 0 and num_cols > 3: # A1:D1 - Set the cell background color and text to bold d1 = { 'repeatCell': { 'range': { 'sheetId': sheet_id, 'startRowIndex': 0, 'endRowIndex': 1, 'startColumnIndex': 0, 'endColumnIndex': 4 }, 'cell': { 'userEnteredFormat': { 'backgroundColor': { 'red': 217 / 255, 'green': 210 / 255, 'blue': 233 / 255 }, 'textFormat': { 'bold': True } } }, 'fields': 'userEnteredFormat(backgroundColor,textFormat)' } } body.get('requests', []).append(d1) # A1:C? - Set the horizontal alignment to LEFT d2 = { 'repeatCell': { 'range': { 'sheetId': sheet_id, 'startRowIndex': 0, 'endRowIndex': num_rows, 'startColumnIndex': 0, 'endColumnIndex': 3 }, 'cell': { 'userEnteredFormat': { 'numberFormat': { 'type': 'TEXT' }, 'horizontalAlignment': 'LEFT' } }, 'fields': 'userEnteredFormat(numberFormat,horizontalAlignment)' } } body.get('requests', []).append(d2) # D1:D? - Set the horizontal alignment to RIGHT d3 = { 'repeatCell': { 'range': { 'sheetId': sheet_id, 'startRowIndex': 0, 'endRowIndex': num_rows, 'startColumnIndex': 3, 'endColumnIndex': 4 }, 'cell': { 'userEnteredFormat': { 'horizontalAlignment': 'RIGHT' } }, 'fields': 'userEnteredFormat(horizontalAlignment)' } } body.get('requests', []).append(d3) if num_rows > 1 and num_cols > 3: # D2:D? - Set number format to NUMBER with 2 decimal places d4 = { 'repeatCell': { 'range': { 'sheetId': sheet_id, 'startRowIndex': 1, 'endRowIndex': num_rows, 'startColumnIndex': 3, 'endColumnIndex': 4 }, 'cell': { 'userEnteredFormat': { 'numberFormat': { 'type': 'NUMBER', 'pattern': '0.00' } } }, 'fields': 'userEnteredFormat(numberFormat)' } } body.get('requests', []).append(d4) if num_rows > 0 and num_cols > 4: # E1:?1 - Set background color, horizontal alignment, number format, and bold. d5 = { 'repeatCell': { 'range': { 'sheetId': sheet_id, 'startRowIndex': 0, 'endRowIndex': 1, 'startColumnIndex': 4, 'endColumnIndex': num_cols }, 'cell': { 'userEnteredFormat': { 'backgroundColor': { 'red': 201 / 255, 'green': 218 / 255, 'blue': 248 / 255 }, 'horizontalAlignment': 'CENTER', 'numberFormat': { 'type': 'DATE', 'pattern': 'm"/"d' }, 'textFormat': { 'bold': True } } }, 'fields': 'userEnteredFormat(backgroundColor,horizontalAlignment,numberFormat,textFormat)' } } body.get('requests', []).append(d5) if num_rows > 1 and num_cols > 4: # E2:?? - Set horizontal alignment to CENTER and number format to NUMBER with 2 decimals. d6 = { 'repeatCell': { 'range': { 'sheetId': sheet_id, 'startRowIndex': 1, 'endRowIndex': num_rows, 'startColumnIndex': 4, 'endColumnIndex': num_cols }, 'cell': { 'userEnteredFormat': { 'horizontalAlignment': 'CENTER', 'numberFormat': { 'type': 'NUMBER', 'pattern': '0.00' } } }, 'fields': 'userEnteredFormat(horizontalAlignment,numberFormat)' } } body.get('requests', []).append(d6) if num_cols > 3: # Columns A:D - Set the width to 100 pixels (default) d7 = { 'updateDimensionProperties': { 'range': { 'sheetId': sheet_id, 'dimension': 'COLUMNS', 'startIndex': 0, 'endIndex': 4 }, 'properties': { 'pixelSize': 100 }, 'fields': 'pixelSize' } } body.get('requests', []).append(d7) if num_cols > 4: # Columns E:? - Set the width to 50 pixels d8 = { 'updateDimensionProperties': { 'range': { 'sheetId': sheet_id, 'dimension': 'COLUMNS', 'startIndex': 4, 'endIndex': num_cols }, 'properties': { 'pixelSize': 50 }, 'fields': 'pixelSize' } } body.get('requests', []).append(d8) if num_rows > 0: # Rows 1:? - Set the height to 21 pixels (default) d9 = { 'updateDimensionProperties': { 'range': { 'sheetId': sheet_id, 'dimension': 'ROWS', 'startIndex': 0, 'endIndex': num_rows }, 'properties': { 'pixelSize': 21 }, 'fields': 'pixelSize' } } body.get('requests', []).append(d9) if num_rows > 1: # Row 1 and Columns A:D - Set to frozen d10 = { 'updateSheetProperties': { 'properties': { 'sheetId': sheet_id, 'gridProperties': { 'frozenRowCount': 1 } }, 'fields': 'gridProperties(frozenRowCount)' } } body.get('requests', []).append(d10) if num_cols > 4: # Row 1 and Columns A:D - Set to frozen d11 = { 'updateSheetProperties': { 'properties': { 'sheetId': sheet_id, 'gridProperties': { 'frozenColumnCount': 4 } }, 'fields': 'gridProperties(frozenColumnCount)' } } body.get('requests', []).append(d11) try: if len(body.get('requests', [])) > 0: wb.batch_update(body) return True, '' except Exception as e: return False, 'There was an error formatting the Google Sheet.'
def __remove_data_and_formatting(self, wb: gspread.Spreadsheet, ws: gspread.Worksheet) -> tuple: """ This *private* method removes all data and formatting from the sheet. Without this, when new rows and columns are added, the format from the existing cells is used. :param wb: the Google Spreadsheet file :param ws: the one Google Worksheet in the file :return: None """ # See the Google Sheets API and gspread documentation for help sheet_id = ws._properties['sheetId'] if sheet_id: # Clear all formatting on the sheet d0 = { 'updateCells': { 'range': { 'sheetId': sheet_id }, 'fields': 'userEnteredFormat' } } body = {'requests': [d0]} if ws.col_count > 0: # Reset the column width to 100 pixels (default) so that new columns are added with this default size. d1 = { 'updateDimensionProperties': { 'range': { 'sheetId': sheet_id, 'dimension': 'COLUMNS', 'startIndex': 0, 'endIndex': ws.col_count }, 'properties': { 'pixelSize': 100 }, 'fields': 'pixelSize' } } body.get('requests', []).append(d1) if ws.row_count > 0: # Reset the row height to 21 pixels (default) so that new rows are added with this default size. d2 = { 'updateDimensionProperties': { 'range': { 'sheetId': sheet_id, 'dimension': 'ROWS', 'startIndex': 0, 'endIndex': ws.row_count }, 'properties': { 'pixelSize': 21 }, 'fields': 'pixelSize' } } body.get('requests', []).append(d2) d3 = { 'updateSheetProperties': { 'properties': { 'sheetId': sheet_id, 'gridProperties': { 'frozenRowCount': 0, 'frozenColumnCount': 0 } }, 'fields': 'gridProperties(frozenRowCount, frozenColumnCount)' } } body.get('requests', []).append(d3) try: # Clear all data on the sheet # ws.clear() cannot be completed with a ws.batch_update() call, so it done separate response = ws.clear() if len(body.get('requests', [])) > 0: wb.batch_update(body) return True, '' except Exception as e: return False, 'There was an error removing the previous data and formatting from the Google Sheet.'
def update_sheet_from_df(sheet: gspread.Spreadsheet, df: pd.DataFrame): """Dump the current dataframe onto the google sheet""" sheet.update([df.columns.values.tolist()] + df.values.tolist())