def apply_data_validation(cogs_dir, data_valiation_tables): """Apply one or more data validation rules to the sheets.""" tracked_sheets = get_tracked_sheets(cogs_dir) add_dv_rules = {} for data_validation_table in data_valiation_tables: add_rows = [] for row in data_validation_table: sheet_title = row["table"] if sheet_title not in tracked_sheets.keys(): raise ApplyError(f"'{sheet_title}' is not a tracked sheet") rule = clean_rule(row["table"], row["range"], row["condition"], row["value"]) add_rows.append(rule) for row in add_rows: sheet_title = row["Sheet Title"] del row["Sheet Title"] if sheet_title in add_dv_rules: dv_rules = add_dv_rules[sheet_title] else: dv_rules = [] dv_rules.append(row) add_dv_rules[sheet_title] = dv_rules update_data_validation(cogs_dir, add_dv_rules, [])
def status(use_screen=True, verbose=False): """Return a dict containing: - changes (dict of new_version, added_cols, removed_cols, added_lines, removed_lines, changed_lines built from daff) - added local (sheet names) - removed local (sheet names) - added remote (sheet names) - removed remote (sheet names) If use_screen, print the status of local sheets vs. remote sheets.""" set_logging(verbose) cogs_dir = validate_cogs_project() # Get the sets of changes tracked_sheets = get_tracked_sheets(cogs_dir) # Get rid of ignored sheets tracked_sheets = { x: y for x, y in tracked_sheets.items() if y.get("Ignore") != "True" } renamed = get_renamed_sheets(cogs_dir) changes = get_changes(cogs_dir, tracked_sheets, renamed) # Get a count of all changes change_count = set( list(changes["diffs"].keys()) + changes["added local"] + changes["added remote"] + changes["removed local"] + changes["removed remote"]) if len(change_count) == 0 and len(renamed) == 0: return None if use_screen: print_status(changes, renamed, tracked_sheets) return changes
def clear(keyword, on_sheets=None, verbose=False): """Remove formats, notes, and/or data validation rules from one or more sheets.""" cogs_dir = validate_cogs_project() set_logging(verbose) # Validate sheets tracked_sheets = get_tracked_sheets(cogs_dir) ignore = [ x for x, y in tracked_sheets.items() if y.get("Ignore") == "True" ] if not on_sheets: # If no sheet was supplied, clear from all on_sheets = [x for x in tracked_sheets.keys() if x not in ignore] untracked = [] for st in on_sheets: if st not in tracked_sheets.keys(): untracked.append(st) if st in ignore: logging.error( f"Cannot clear from ignored sheet '{st}' - this will be ignored" ) on_sheets.remove(st) if untracked: raise ClearError( f"The following sheet(s) are not part of this project: " + ", ".join(untracked)) if keyword == "formats": for st in on_sheets: clear_formats(cogs_dir, st) elif keyword == "notes": for st in on_sheets: clear_notes(cogs_dir, st) elif keyword == "validation": for st in on_sheets: clear_data_validation(cogs_dir, st) elif keyword == "all": for st in on_sheets: clear_formats(cogs_dir, st) clear_notes(cogs_dir, st) clear_data_validation(cogs_dir, st) else: raise ClearError("Unknown keyword: " + keyword)
def ls(verbose=False): """Return a list of [sheet, path] pairs.""" set_logging(verbose) cogs_dir = validate_cogs_project() tracked_sheets = get_tracked_sheets(cogs_dir) ignore = [ x for x, y in tracked_sheets.items() if y.get("Ignore") == "True" ] sheet_details = [["Tracked:"]] for sheet, details in tracked_sheets.items(): if sheet in ignore: continue sheet_details.append([" - " + sheet, "(" + details["Path"] + ")"]) if ignore: sheet_details.append([]) sheet_details.append(["Ignored:"]) for i in ignore: sheet_details.append([" - " + i]) return sheet_details
def diff(paths=None, use_screen=True, verbose=False): """Return a dict of sheet title to daff diff lines. If no paths are provided, diff over all sheets in the project. If use_screen, display an interactive curses screen with the diffs.""" set_logging(verbose) cogs_dir = validate_cogs_project() sheets = get_tracked_sheets(cogs_dir) tracked_paths = [details["Path"] for details in sheets.values()] if paths: # Update sheets to diff for p in paths: if p not in tracked_paths: raise DiffError( f"sheet '{p}' is not part of the current project") sheets = { sheet_title: details for sheet_title, details in sheets.items() if details["Path"] in paths } diffs = {} for sheet_title, details in sheets.items(): cached = get_cached_path(cogs_dir, sheet_title) local = details["Path"] if os.path.exists(local) and os.path.exists(cached): # Consider remote (cached) the old version to diff off of sheet_diff = get_diff(cached, local) diffs[sheet_title] = sheet_diff if not diffs: return None if use_screen: return display_diff(cogs_dir, diffs, sheets) return diffs
def add_all(verbose=False): set_logging(verbose) cogs_dir = validate_cogs_project() sheets = get_tracked_sheets(cogs_dir) sheet_lines = [] for sheet_title, details in sheets.items(): ignored = details.get("Ignore") if ignored: path = re.sub(r"[^A-Za-z0-9]+", "_", sheet_title.lower()) + ".tsv" if os.path.exists(path): now = datetime.now().strftime("%Y%m%d_%H%M%S") path = re.sub(r"[^A-Za-z0-9]+", "_", sheet_title.lower()) + f"_{now}.tsv" details["Path"] = path logging.info( f"Adding ignored sheet '{sheet_title}' to tracked sheets with path '{path}'" ) del details["Ignore"] details["Title"] = sheet_title sheet_lines.append(details) update_sheet(cogs_dir, sheet_lines, [])
def rm(paths, keep=False, verbose=False): """Remove a table (TSV or CSV) from the COGS project. This updates sheet.tsv and deletes the corresponding cached file.""" set_logging(verbose) cogs_dir = validate_cogs_project() # Make sure the sheets exist sheets = get_tracked_sheets(cogs_dir) path_to_sheet = { os.path.abspath(details["Path"]): sheet_title for sheet_title, details in sheets.items() } # Check for either untracked or ignored sheets in provided paths ignore = [x for x, y in sheets.items() if y.get("Ignore") == "True"] untracked = [] ignored = [] for p in paths: abspath = os.path.abspath(p) if abspath not in path_to_sheet: untracked.append(p) elif path_to_sheet[abspath] in ignore: ignored.append(p) if untracked: raise RmError( f"unable to remove untracked file(s): {', '.join(untracked)}.") if ignored: raise RmError( f"unable to remove ignored file(s): {', '.join(ignored)}") sheets_to_remove = { title: sheet for title, sheet in sheets.items() if sheet["Path"] in paths } # Make sure we are not deleting the last sheet as Google spreadsheet would refuse to do so if len(sheets) - len(sheets_to_remove) == 0: raise RmError( f"unable to remove {len(sheets_to_remove)} tracked sheet(s) - " "the spreadsheet must have at least one sheet.") # Maybe remove local copies if not keep: for p in paths: if os.path.exists(p): os.remove(p) # Remove the cached copies for sheet_title in sheets_to_remove.keys(): cached_path = get_cached_path(cogs_dir, sheet_title) if os.path.exists(cached_path): os.remove(cached_path) # Update sheet.tsv with open(f"{cogs_dir}/sheet.tsv", "w") as f: writer = csv.DictWriter( f, delimiter="\t", lineterminator="\n", fieldnames=[ "ID", "Title", "Path", "Description", "Frozen Rows", "Frozen Columns", "Ignore", ], ) writer.writeheader() for title, sheet in sheets.items(): if title not in sheets_to_remove.keys(): sheet["Title"] = title writer.writerow(sheet) # Update formats and notes sheet_formats = get_sheet_formats(cogs_dir) update_format(cogs_dir, sheet_formats, sheets_to_remove.keys()) sheet_notes = get_sheet_notes(cogs_dir) update_note(cogs_dir, sheet_notes, sheets_to_remove.keys())
def apply_messages(cogs_dir, message_tables): """Apply one or more message tables (from dict reader) to the sheets as formats and notes.""" tracked_sheets = get_tracked_sheets(cogs_dir) # Get existing formats sheet_to_formats = get_sheet_formats(cogs_dir) # Remove any formats that are "applied" (format ID 0, 1, or 2) sheet_to_manual_formats = {} for sheet_title, cell_to_formats in sheet_to_formats.items(): manual_formats = {} for cell, fmt in cell_to_formats.items(): if int(fmt) > 2: manual_formats[cell] = fmt sheet_to_manual_formats[sheet_title] = manual_formats sheet_to_formats = sheet_to_manual_formats # Remove any notes that are "applied" (starts with ERROR, WARN, or INFO) sheet_to_notes = get_sheet_notes(cogs_dir) sheet_to_manual_notes = {} for sheet_title, cell_to_notes in sheet_to_notes.items(): manual_notes = {} for cell, note in cell_to_notes.items(): if (not note.startswith("ERROR: ") and not note.startswith("WARN: ") and not note.startswith("INFO: ")): manual_notes[cell] = note sheet_to_manual_notes[sheet_title] = manual_notes sheet_to_notes = sheet_to_manual_notes # Read the message table to get the formats & notes to add for message_table in message_tables: for row in message_table: # Check for cell location - skip if none cell = row.get("cell") if not cell or cell.strip() == "": continue cell = cell.upper() table = os.path.splitext(os.path.basename(row["table"]))[0] if table not in tracked_sheets: # TODO - error? warning? logging.warning(f"'{table}' is not a tracked sheet") continue if table in sheet_to_formats: cell_to_formats = sheet_to_formats[table] else: cell_to_formats = {} if table in sheet_to_notes: cell_to_notes = sheet_to_notes[table] else: cell_to_notes = {} # Check for current applied formats and/or notes current_fmt = -1 current_note = None if cell in cell_to_formats and int(cell_to_formats[cell]) <= 2: current_fmt = cell_to_formats[cell] if cell in cell_to_notes: current_note = cell_to_notes[cell] if (not current_note.startswith("ERROR") and not current_note.startswith("WARN") and not current_note.startswith("INFO")): # Not an applied note current_note = None # Set formatting based on level of issue if "level" in row: level = row["level"].lower().strip() else: level = "error" if level == "error": cell_to_formats[cell] = 0 elif level == "warn" or level == "warning": level = "warn" if current_fmt != 0: cell_to_formats[cell] = 1 elif level == "info": if current_fmt < 1: cell_to_formats[cell] = 2 message = None if "message" in row: message = row["message"] if message == "": message = None suggest = None if "suggestion" in row: suggest = row["suggestion"] if suggest == "": suggest = None rule_id = None if "rule id" in row: rule_id = row["rule id"] # Add the note rule_name = None if "rule" in row: rule_name = row["rule"] logging.info(f'Adding "{rule_name}" to {cell} as a(n) {level}') else: logging.info(f"Adding message to {cell} as a(n) {level}") # Format the note if rule_name: note = f"{level.upper()}: {rule_name}" else: note = level.upper() if message: note += f"\n{message}" if suggest: note += f'\nSuggested Fix: "{suggest}"' if rule_id: note += f"\nFor more details, see {rule_id}" # Add to dict if current_note: cell_to_notes[cell] = f"{current_note}\n\n{note}" else: cell_to_notes[cell] = note sheet_to_formats[table] = cell_to_formats sheet_to_notes[table] = cell_to_notes # Update formats & notes TSVs update_note(cogs_dir, sheet_to_notes, []) update_format(cogs_dir, sheet_to_formats, [])
def merge(verbose=False): """Copy cached sheets to their local paths.""" set_logging(verbose) cogs_dir = validate_cogs_project() cached_sheets = get_cached_sheets(cogs_dir) tracked_sheets = get_tracked_sheets(cogs_dir) tracked_cached = [ re.sub(r"[^A-Za-z0-9]+", "_", x.lower()) for x in tracked_sheets.keys() ] # Get the list of ignored sheet titles ignore = [ x for x, y in tracked_sheets.items() if y.get("Ignore") == "True" ] renamed_sheets = get_renamed_sheets(cogs_dir) renamed_local = { old: details for old, details in renamed_sheets.items() if details["where"] == "local" } renamed_remote = { old: details for old, details in renamed_sheets.items() if old not in renamed_local } # Add new remotes to tracked cached for details in renamed_remote.values(): tracked_cached.append( re.sub(r"[^A-Za-z0-9]+", "_", details["new"].lower())) remove_sheets = [s for s in cached_sheets if s not in tracked_cached] for sheet_title, details in tracked_sheets.items(): if sheet_title in ignore or sheet_title in renamed_remote: continue cached_path = get_cached_path(cogs_dir, sheet_title) local_sheet = details["Path"] if os.path.exists(cached_path): logging.info(f"Writing '{sheet_title}' to {local_sheet}") if local_sheet.endswith(".csv"): copy_to_csv(cached_path, local_sheet) else: shutil.copyfile(cached_path, local_sheet) # Handle renamed remote files by replacing their cached copies and adding to sheet.tsv for old_title, details in renamed_remote.items(): new_title = details["new"] cached_path = get_cached_path(cogs_dir, old_title) logging.info( f"Removing '{old_title}' from cached sheets and replacing with '{new_title}'" ) if os.path.exists(cached_path): os.remove(cached_path) # Write new copy local_sheet = details["path"] cached_path = get_cached_path(cogs_dir, new_title) if os.path.exists(cached_path): logging.info(f"Writing '{new_title}' to {local_sheet}") if local_sheet.endswith(".csv"): copy_to_csv(cached_path, local_sheet) else: shutil.copyfile(cached_path, local_sheet) # Update sheet.tsv sheet_details = tracked_sheets[old_title] del tracked_sheets[old_title] sheet_details["Path"] = local_sheet tracked_sheets[new_title] = sheet_details for sheet_title in remove_sheets: logging.info(f"Removing '{sheet_title}' from cached sheets") os.remove(f"{cogs_dir}/tracked/{sheet_title}.tsv") with open(f"{cogs_dir}/renamed.tsv", "w") as f: for old_title, details in renamed_local.items(): new_title = details["new"] path = details["path"] f.write(f"{old_title}\t{new_title}\t{path}\tlocal\n") if renamed_remote: # We need to update sheet.tsv if anything was renamed remotely sheet_details = [] for title, details in tracked_sheets.items(): details["Title"] = title sheet_details.append(details) update_sheet(cogs_dir, sheet_details, [])
def fetch(verbose=False): """Fetch all sheets from project spreadsheet to .cogs/ directory.""" set_logging(verbose) cogs_dir = validate_cogs_project() config = get_config(cogs_dir) gc = get_client_from_config(config) spreadsheet = gc.open_by_key(config["Spreadsheet ID"]) # Get the remote sheets from spreadsheet sheets = spreadsheet.worksheets() remote_sheets = get_remote_sheets(sheets) tracked_sheets = get_tracked_sheets(cogs_dir, include_no_id=True) id_to_title = { int(details["ID"]): sheet_title for sheet_title, details in tracked_sheets.items() if details.get("ID") } # Get details about renamed sheets renamed_local = get_renamed_sheets(cogs_dir) renamed_remote = {} # Format ID to format for cell formatting id_to_format = get_format_dict(cogs_dir) if id_to_format: # Format to format ID format_to_id = { json.dumps(v, sort_keys=True): k for k, v in id_to_format.items() } # Next ID for new formats format_ids = list(id_to_format.keys()) format_ids.sort() next_fmt_id = int(format_ids[-1]) + 1 else: format_to_id = {} next_fmt_id = 1 # Export the sheets as TSV to .cogs/ # We also collect the formatting and note data for each sheet during this step sheet_formats = {} sheet_notes = {} sheet_dv_rules = {} sheet_frozen = {} # Lines to add to sheet.tsv of sheets to ignore new_ignore = [] for sheet in sheets: remote_title = sheet.title if remote_title in tracked_sheets and tracked_sheets[remote_title].get( "Ignore") == "True": logging.info(f"Skipping ignored sheet '{remote_title}'...") continue # Download the sheet as the renamed sheet if necessary if remote_title in renamed_local: st = renamed_local[remote_title]["new"] logging.info( f"Downloading sheet '{remote_title}' as {st} (renamed locally)" ) else: st = remote_title if sheet.id in id_to_title: local_title = id_to_title[sheet.id] if local_title != remote_title: # The sheet title has been changed remotely # This will be updated in tracking but the local sheet will remain old_path = tracked_sheets[local_title]["Path"] logging.warning( f"Local sheet '{local_title}' has been renamed to '{st}' remotely:" f"\n - '{local_title}' is removed from tracking and replaced with '{st}'" f"\n - {old_path} will not be updated when running `cogs pull` " f"\n - changes to {old_path} will not be pushed to the remote spreadsheet" ) renamed_remote[local_title] = { "new": st, "path": re.sub(r"[^A-Za-z0-9]+", "_", st.lower()) + ".tsv", } logging.info( f"Downloading sheet '{local_title}' as '{st}' (renamed remotely)" ) else: logging.info(f"Downloading sheet '{st}'") else: # Add as an ignored sheet new_ignore.append({ "ID": sheet.id, "Title": st, "Ignore": True }) logging.info(f"Ignoring new remote sheet '{st}'") continue # Get frozen rows & columns sheet_frozen[st] = { "row": sheet.frozen_row_count, "col": sheet.frozen_col_count, } # Get the cells with format, value, and note from remote sheet cells = get_cell_data(cogs_dir, sheet) # Create a map of rule -> locs for data validation dv_rules = {} str_to_rule = {} for loc, cell_data in cells.items(): if "data_validation" not in cell_data: continue data_validation = cell_data["data_validation"] condition = data_validation["condition"] bc = gf.BooleanCondition(condition, data_validation["value"]) dv = gf.DataValidationRule(bc) if str(dv) not in str_to_rule: str_to_rule[str(dv)] = dv if str(dv) in dv_rules: locs = dv_rules[str(dv)] else: locs = [] locs.append(loc) dv_rules[str(dv)] = locs # Aggregate by location and format for validate.tsv dv_rows = clean_data_validation_rules(dv_rules, str_to_rule) sheet_dv_rules[st] = dv_rows # Get the ending row & col that have values # Otherwise we end up with a bunch of empty rows/columns # Also get the data validation max_row = 0 max_col = 0 for c in cells.keys(): row, col = gspread.utils.a1_to_rowcol(c) if row > max_row: max_row = row if col > max_col: max_col = col # Cell label to format dict cell_to_format = { cell: data["format"] for cell, data in cells.items() if "format" in data } # Create a cell to format ID dict based on the format dict for each cell cell_to_format_id = {} last_fmt = None cell_range_start = None cell_range_end = None for cell, fmt in cell_to_format.items(): if not fmt: if last_fmt: if not cell_range_end or cell_range_start == cell_range_end: cell_to_format_id[cell_range_start] = last_fmt else: cell_to_format_id[ f"{cell_range_start}:{cell_range_end}"] = last_fmt last_fmt = None cell_range_start = None cell_range_end = None continue key = json.dumps(fmt, sort_keys=True) if key in format_to_id: # Format already exists, assign that ID fmt_id = format_to_id[key] else: # Assign new ID fmt_id = next_fmt_id format_to_id[key] = fmt_id id_to_format[fmt_id] = fmt next_fmt_id += 1 if last_fmt and fmt_id == last_fmt: # The last cell had a format and the this cell's format is the same as the last # so we increase the range cell_range_end = cell elif last_fmt and fmt_id != last_fmt: # The last cell had a format but it was different than the current format if cell_range_start == cell_range_end or not cell_range_end: # Not a range, just a single cell (the previous cell) cell_to_format_id[cell_range_start] = last_fmt else: cell_to_format_id[ f"{cell_range_start}:{cell_range_end}"] = last_fmt # Restarting a new range at this cell cell_range_start = cell cell_range_end = None else: # No last formatting to compare to, start a new range cell_range_start = cell cell_range_end = cell last_fmt = fmt_id if cell_to_format_id: sheet_formats[st] = cell_to_format_id # Add the cell to note cell_to_note = { cell: data["note"] for cell, data in cells.items() if "note" in data } if cell_to_note: sheet_notes[st] = cell_to_note # Write values to .cogs/tracked/{sheet title}.tsv cached_path = get_cached_path(cogs_dir, st) with open(cached_path, "w") as f: lines = sheet.get_all_values() writer = csv.writer(f, delimiter="\t", lineterminator="\n") writer.writerows(lines) # Write or rewrite formats JSON with new dict with open(f"{cogs_dir}/formats.json", "w") as f: f.write(json.dumps(id_to_format, sort_keys=True, indent=4)) # Update local sheets details in sheet.tsv with new IDs & details for current tracked sheets all_sheets = get_updated_sheet_details(tracked_sheets, remote_sheets, sheet_frozen) removed_titles = remove_sheets(cogs_dir, sheets, tracked_sheets, renamed_local, renamed_remote) # Add renamed-remote for old_title, details in renamed_remote.items(): with open(f"{cogs_dir}/renamed.tsv", "a") as f: new_title = details["new"] new_path = details["path"] f.write(f"{old_title}\t{new_title}\t{new_path}\tremote\n") # Rewrite format.tsv and note.tsv with current remote formats & notes update_format(cogs_dir, sheet_formats, removed_titles) update_note(cogs_dir, sheet_notes, removed_titles) # Remove old data validation rules and rewrite with new with open(f"{cogs_dir}/validation.tsv", "w") as f: f.write("Sheet\tRange\tCondition\tValue\n") update_data_validation(cogs_dir, sheet_dv_rules, removed_titles) # Then update sheet.tsv all_sheets.extend(new_ignore) update_sheet(cogs_dir, all_sheets, removed_titles)
def add(path, title=None, description=None, freeze_row=0, freeze_column=0, verbose=False): """Add a table (TSV or CSV) to the COGS project. This updates sheet.tsv.""" set_logging(verbose) cogs_dir = validate_cogs_project() sheets = get_tracked_sheets(cogs_dir) if path in sheets and sheets[path]["Ignore"]: # The provided path is just the title of an ignored sheet add_ignored(cogs_dir, sheets, path, None, description=description) return if title in sheets and sheets[title]["Ignore"]: # A path to write the ignored sheet was provided add_ignored(cogs_dir, sheets, title, path, description=description) return if not os.path.exists(path): # Create an empty file if the file doesn't exist f = open(path, "w") f.close() if not title: # Create the sheet title from file basename title = ntpath.basename(path).split(".")[0] # Make sure we aren't duplicating a table local_sheets = get_tracked_sheets(cogs_dir) if title in local_sheets: raise AddError(f"'{title}' sheet already exists in this project") # Make sure we aren't duplicating a path local_paths = {x["Path"]: t for t, x in local_sheets.items()} if path in local_paths.keys(): other_title = local_paths[path] raise AddError(f"Local table {path} already exists as '{other_title}'") # Maybe get a description if not description: description = "" # Finally, add this TSV to sheet.tsv with open(f"{cogs_dir}/sheet.tsv", "a") as f: writer = csv.DictWriter( f, delimiter="\t", lineterminator="\n", fieldnames=[ "ID", "Title", "Path", "Description", "Frozen Rows", "Frozen Columns", "Ignore", ], ) # ID gets filled in when we add it to the Sheet writer.writerow({ "ID": "", "Title": title, "Path": path, "Description": description, "Frozen Rows": freeze_row, "Frozen Columns": freeze_column, "Ignore": False, }) logging.info(f"{title} successfully added to project")