예제 #1
0
def status(use_screen=True, verbose=False):
    """Return a dict containing:
    - changes (dict of new_version, added_cols, removed_cols, added_lines, removed_lines,
      changed_lines built from daff)
    - added local (sheet names)
    - removed local (sheet names)
    - added remote (sheet names)
    - removed remote (sheet names)
    If use_screen, print the status of local sheets vs. remote sheets."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    # Get the sets of changes
    tracked_sheets = get_tracked_sheets(cogs_dir)
    # Get rid of ignored sheets
    tracked_sheets = {
        x: y
        for x, y in tracked_sheets.items() if y.get("Ignore") != "True"
    }
    renamed = get_renamed_sheets(cogs_dir)
    changes = get_changes(cogs_dir, tracked_sheets, renamed)

    # Get a count of all changes
    change_count = set(
        list(changes["diffs"].keys()) + changes["added local"] +
        changes["added remote"] + changes["removed local"] +
        changes["removed remote"])
    if len(change_count) == 0 and len(renamed) == 0:
        return None

    if use_screen:
        print_status(changes, renamed, tracked_sheets)
    return changes
예제 #2
0
파일: clear.py 프로젝트: ontodev/cogs
def clear(keyword, on_sheets=None, verbose=False):
    """Remove formats, notes, and/or data validation rules from one or more sheets."""
    cogs_dir = validate_cogs_project()
    set_logging(verbose)

    # Validate sheets
    tracked_sheets = get_tracked_sheets(cogs_dir)
    ignore = [
        x for x, y in tracked_sheets.items() if y.get("Ignore") == "True"
    ]

    if not on_sheets:
        # If no sheet was supplied, clear from all
        on_sheets = [x for x in tracked_sheets.keys() if x not in ignore]

    untracked = []
    for st in on_sheets:
        if st not in tracked_sheets.keys():
            untracked.append(st)
        if st in ignore:
            logging.error(
                f"Cannot clear from ignored sheet '{st}' - this will be ignored"
            )
            on_sheets.remove(st)
    if untracked:
        raise ClearError(
            f"The following sheet(s) are not part of this project: " +
            ", ".join(untracked))

    if keyword == "formats":
        for st in on_sheets:
            clear_formats(cogs_dir, st)
    elif keyword == "notes":
        for st in on_sheets:
            clear_notes(cogs_dir, st)
    elif keyword == "validation":
        for st in on_sheets:
            clear_data_validation(cogs_dir, st)
    elif keyword == "all":
        for st in on_sheets:
            clear_formats(cogs_dir, st)
            clear_notes(cogs_dir, st)
            clear_data_validation(cogs_dir, st)
    else:
        raise ClearError("Unknown keyword: " + keyword)
예제 #3
0
def share(email, role, verbose=False):
    """Share the project spreadsheet with email addresses as reader, writer, or owner."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    config = get_config(cogs_dir)
    gc = get_client_from_config(config)

    spreadsheet = gc.open_by_key(config["Spreadsheet ID"])
    title = spreadsheet.title

    if role == "owner":
        share_spreadsheet(title, spreadsheet, email, "owner")
    elif role == "reader":
        share_spreadsheet(title, spreadsheet, email, "reader")
    elif role == "writer":
        share_spreadsheet(title, spreadsheet, email, "writer")
    else:
        raise RuntimeError("Unknown role passed to `share`: " + str(role))
예제 #4
0
def ls(verbose=False):
    """Return a list of [sheet, path] pairs."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    tracked_sheets = get_tracked_sheets(cogs_dir)
    ignore = [
        x for x, y in tracked_sheets.items() if y.get("Ignore") == "True"
    ]
    sheet_details = [["Tracked:"]]
    for sheet, details in tracked_sheets.items():
        if sheet in ignore:
            continue
        sheet_details.append([" - " + sheet, "(" + details["Path"] + ")"])
    if ignore:
        sheet_details.append([])
        sheet_details.append(["Ignored:"])
        for i in ignore:
            sheet_details.append([" - " + i])

    return sheet_details
예제 #5
0
def apply(paths, verbose=False):
    """Apply a table to the spreadsheet. The type of table to 'apply' is based on the headers:
    standardized messages or data validation."""
    cogs_dir = validate_cogs_project()
    set_logging(verbose)

    message_tables = []
    data_validation_tables = []
    for p in paths:
        if p.endswith("csv"):
            sep = ","
        else:
            sep = "\t"
        with open(p, "r") as f:
            # Get headers and rows
            reader = csv.DictReader(f, delimiter=sep)
            headers = [x.lower() for x in reader.fieldnames]
            rows = []
            for r in reader:
                rows.append({k.lower(): v for k, v in r.items()})

            # Determine type of table
            if headers == data_validation_headers:
                data_validation_tables.append(rows)
            elif "table" in headers and "cell" in headers:
                for h in headers:
                    if h not in message_headers:
                        raise ApplyError(
                            f"The headers in table {p} are not valid for apply"
                        )
                message_tables.append(rows)
            else:
                raise ApplyError(
                    f"The headers in table {p} are not valid for apply")

    if message_tables:
        apply_messages(cogs_dir, message_tables)

    if data_validation_tables:
        apply_data_validation(cogs_dir, data_validation_tables)
예제 #6
0
def diff(paths=None, use_screen=True, verbose=False):
    """Return a dict of sheet title to daff diff lines. If no paths are provided, diff over all
    sheets in the project. If use_screen, display an interactive curses screen with the diffs."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    sheets = get_tracked_sheets(cogs_dir)
    tracked_paths = [details["Path"] for details in sheets.values()]
    if paths:
        # Update sheets to diff
        for p in paths:
            if p not in tracked_paths:
                raise DiffError(
                    f"sheet '{p}' is not part of the current project")
        sheets = {
            sheet_title: details
            for sheet_title, details in sheets.items()
            if details["Path"] in paths
        }

    diffs = {}
    for sheet_title, details in sheets.items():
        cached = get_cached_path(cogs_dir, sheet_title)
        local = details["Path"]
        if os.path.exists(local) and os.path.exists(cached):
            # Consider remote (cached) the old version to diff off of
            sheet_diff = get_diff(cached, local)
            diffs[sheet_title] = sheet_diff

    if not diffs:
        return None

    if use_screen:
        return display_diff(cogs_dir, diffs, sheets)

    return diffs
예제 #7
0
def delete(verbose=False):
    """Read COGS configuration and delete the spreadsheet corresponding to the spreadsheet ID.
    Remove .cogs directory."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()
    config = get_config(cogs_dir)

    # Get a client to perform Sheet actions
    gc = get_client_from_config(config)

    # Delete the Sheet
    title = config["Title"]
    cwd = os.getcwd()
    logging.info(f"Removing COGS project '{title}' from {cwd}")
    try:
        ssid = config["Spreadsheet ID"]
        gc.del_spreadsheet(ssid)
    except gspread.exceptions.APIError as e:
        raise DeleteError(f"Unable to delete spreadsheet '{title}'\n" f"CAUSE: {e.response.text}")
    logging.info(f"successfully deleted Google Sheet '{title}' ({ssid})")

    # Remove the COGS data
    if os.path.exists(cogs_dir):
        shutil.rmtree(cogs_dir)
예제 #8
0
파일: add.py 프로젝트: ontodev/cogs
def add_all(verbose=False):
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    sheets = get_tracked_sheets(cogs_dir)
    sheet_lines = []
    for sheet_title, details in sheets.items():
        ignored = details.get("Ignore")
        if ignored:
            path = re.sub(r"[^A-Za-z0-9]+", "_", sheet_title.lower()) + ".tsv"
            if os.path.exists(path):
                now = datetime.now().strftime("%Y%m%d_%H%M%S")
                path = re.sub(r"[^A-Za-z0-9]+", "_",
                              sheet_title.lower()) + f"_{now}.tsv"
            details["Path"] = path
            logging.info(
                f"Adding ignored sheet '{sheet_title}' to tracked sheets with path '{path}'"
            )
        del details["Ignore"]

        details["Title"] = sheet_title
        sheet_lines.append(details)

    update_sheet(cogs_dir, sheet_lines, [])
예제 #9
0
def rm(paths, keep=False, verbose=False):
    """Remove a table (TSV or CSV) from the COGS project.
    This updates sheet.tsv and deletes the corresponding cached file."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    # Make sure the sheets exist
    sheets = get_tracked_sheets(cogs_dir)
    path_to_sheet = {
        os.path.abspath(details["Path"]): sheet_title
        for sheet_title, details in sheets.items()
    }

    # Check for either untracked or ignored sheets in provided paths
    ignore = [x for x, y in sheets.items() if y.get("Ignore") == "True"]
    untracked = []
    ignored = []
    for p in paths:
        abspath = os.path.abspath(p)
        if abspath not in path_to_sheet:
            untracked.append(p)
        elif path_to_sheet[abspath] in ignore:
            ignored.append(p)
    if untracked:
        raise RmError(
            f"unable to remove untracked file(s): {', '.join(untracked)}.")
    if ignored:
        raise RmError(
            f"unable to remove ignored file(s): {', '.join(ignored)}")

    sheets_to_remove = {
        title: sheet
        for title, sheet in sheets.items() if sheet["Path"] in paths
    }

    # Make sure we are not deleting the last sheet as Google spreadsheet would refuse to do so
    if len(sheets) - len(sheets_to_remove) == 0:
        raise RmError(
            f"unable to remove {len(sheets_to_remove)} tracked sheet(s) - "
            "the spreadsheet must have at least one sheet.")

    # Maybe remove local copies
    if not keep:
        for p in paths:
            if os.path.exists(p):
                os.remove(p)

    # Remove the cached copies
    for sheet_title in sheets_to_remove.keys():
        cached_path = get_cached_path(cogs_dir, sheet_title)
        if os.path.exists(cached_path):
            os.remove(cached_path)

    # Update sheet.tsv
    with open(f"{cogs_dir}/sheet.tsv", "w") as f:
        writer = csv.DictWriter(
            f,
            delimiter="\t",
            lineterminator="\n",
            fieldnames=[
                "ID",
                "Title",
                "Path",
                "Description",
                "Frozen Rows",
                "Frozen Columns",
                "Ignore",
            ],
        )
        writer.writeheader()
        for title, sheet in sheets.items():
            if title not in sheets_to_remove.keys():
                sheet["Title"] = title
                writer.writerow(sheet)

    # Update formats and notes
    sheet_formats = get_sheet_formats(cogs_dir)
    update_format(cogs_dir, sheet_formats, sheets_to_remove.keys())

    sheet_notes = get_sheet_notes(cogs_dir)
    update_note(cogs_dir, sheet_notes, sheets_to_remove.keys())
예제 #10
0
파일: merge.py 프로젝트: ontodev/cogs
def merge(verbose=False):
    """Copy cached sheets to their local paths."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    cached_sheets = get_cached_sheets(cogs_dir)
    tracked_sheets = get_tracked_sheets(cogs_dir)
    tracked_cached = [
        re.sub(r"[^A-Za-z0-9]+", "_", x.lower())
        for x in tracked_sheets.keys()
    ]

    # Get the list of ignored sheet titles
    ignore = [
        x for x, y in tracked_sheets.items() if y.get("Ignore") == "True"
    ]

    renamed_sheets = get_renamed_sheets(cogs_dir)
    renamed_local = {
        old: details
        for old, details in renamed_sheets.items()
        if details["where"] == "local"
    }
    renamed_remote = {
        old: details
        for old, details in renamed_sheets.items() if old not in renamed_local
    }
    # Add new remotes to tracked cached
    for details in renamed_remote.values():
        tracked_cached.append(
            re.sub(r"[^A-Za-z0-9]+", "_", details["new"].lower()))

    remove_sheets = [s for s in cached_sheets if s not in tracked_cached]

    for sheet_title, details in tracked_sheets.items():
        if sheet_title in ignore or sheet_title in renamed_remote:
            continue
        cached_path = get_cached_path(cogs_dir, sheet_title)
        local_sheet = details["Path"]
        if os.path.exists(cached_path):
            logging.info(f"Writing '{sheet_title}' to {local_sheet}")
            if local_sheet.endswith(".csv"):
                copy_to_csv(cached_path, local_sheet)
            else:
                shutil.copyfile(cached_path, local_sheet)

    # Handle renamed remote files by replacing their cached copies and adding to sheet.tsv
    for old_title, details in renamed_remote.items():
        new_title = details["new"]
        cached_path = get_cached_path(cogs_dir, old_title)
        logging.info(
            f"Removing '{old_title}' from cached sheets and replacing with '{new_title}'"
        )
        if os.path.exists(cached_path):
            os.remove(cached_path)

        # Write new copy
        local_sheet = details["path"]
        cached_path = get_cached_path(cogs_dir, new_title)
        if os.path.exists(cached_path):
            logging.info(f"Writing '{new_title}' to {local_sheet}")
            if local_sheet.endswith(".csv"):
                copy_to_csv(cached_path, local_sheet)
            else:
                shutil.copyfile(cached_path, local_sheet)

        # Update sheet.tsv
        sheet_details = tracked_sheets[old_title]
        del tracked_sheets[old_title]
        sheet_details["Path"] = local_sheet
        tracked_sheets[new_title] = sheet_details

    for sheet_title in remove_sheets:
        logging.info(f"Removing '{sheet_title}' from cached sheets")
        os.remove(f"{cogs_dir}/tracked/{sheet_title}.tsv")

    with open(f"{cogs_dir}/renamed.tsv", "w") as f:
        for old_title, details in renamed_local.items():
            new_title = details["new"]
            path = details["path"]
            f.write(f"{old_title}\t{new_title}\t{path}\tlocal\n")

    if renamed_remote:
        # We need to update sheet.tsv if anything was renamed remotely
        sheet_details = []
        for title, details in tracked_sheets.items():
            details["Title"] = title
            sheet_details.append(details)
        update_sheet(cogs_dir, sheet_details, [])
예제 #11
0
def fetch(verbose=False):
    """Fetch all sheets from project spreadsheet to .cogs/ directory."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()

    config = get_config(cogs_dir)
    gc = get_client_from_config(config)
    spreadsheet = gc.open_by_key(config["Spreadsheet ID"])

    # Get the remote sheets from spreadsheet
    sheets = spreadsheet.worksheets()
    remote_sheets = get_remote_sheets(sheets)
    tracked_sheets = get_tracked_sheets(cogs_dir, include_no_id=True)
    id_to_title = {
        int(details["ID"]): sheet_title
        for sheet_title, details in tracked_sheets.items() if details.get("ID")
    }

    # Get details about renamed sheets
    renamed_local = get_renamed_sheets(cogs_dir)
    renamed_remote = {}

    # Format ID to format for cell formatting
    id_to_format = get_format_dict(cogs_dir)
    if id_to_format:
        # Format to format ID
        format_to_id = {
            json.dumps(v, sort_keys=True): k
            for k, v in id_to_format.items()
        }
        # Next ID for new formats
        format_ids = list(id_to_format.keys())
        format_ids.sort()
        next_fmt_id = int(format_ids[-1]) + 1
    else:
        format_to_id = {}
        next_fmt_id = 1

    # Export the sheets as TSV to .cogs/
    # We also collect the formatting and note data for each sheet during this step
    sheet_formats = {}
    sheet_notes = {}
    sheet_dv_rules = {}
    sheet_frozen = {}

    # Lines to add to sheet.tsv of sheets to ignore
    new_ignore = []

    for sheet in sheets:
        remote_title = sheet.title
        if remote_title in tracked_sheets and tracked_sheets[remote_title].get(
                "Ignore") == "True":
            logging.info(f"Skipping ignored sheet '{remote_title}'...")
            continue

        # Download the sheet as the renamed sheet if necessary
        if remote_title in renamed_local:
            st = renamed_local[remote_title]["new"]
            logging.info(
                f"Downloading sheet '{remote_title}' as {st} (renamed locally)"
            )
        else:
            st = remote_title
            if sheet.id in id_to_title:
                local_title = id_to_title[sheet.id]
                if local_title != remote_title:
                    # The sheet title has been changed remotely
                    # This will be updated in tracking but the local sheet will remain
                    old_path = tracked_sheets[local_title]["Path"]
                    logging.warning(
                        f"Local sheet '{local_title}' has been renamed to '{st}' remotely:"
                        f"\n  - '{local_title}' is removed from tracking and replaced with '{st}'"
                        f"\n  - {old_path} will not be updated when running `cogs pull` "
                        f"\n  - changes to {old_path} will not be pushed to the remote spreadsheet"
                    )
                    renamed_remote[local_title] = {
                        "new": st,
                        "path":
                        re.sub(r"[^A-Za-z0-9]+", "_", st.lower()) + ".tsv",
                    }
                    logging.info(
                        f"Downloading sheet '{local_title}' as '{st}' (renamed remotely)"
                    )
                else:
                    logging.info(f"Downloading sheet '{st}'")
            else:
                # Add as an ignored sheet
                new_ignore.append({
                    "ID": sheet.id,
                    "Title": st,
                    "Ignore": True
                })
                logging.info(f"Ignoring new remote sheet '{st}'")
                continue

        # Get frozen rows & columns
        sheet_frozen[st] = {
            "row": sheet.frozen_row_count,
            "col": sheet.frozen_col_count,
        }

        # Get the cells with format, value, and note from remote sheet
        cells = get_cell_data(cogs_dir, sheet)

        # Create a map of rule -> locs for data validation
        dv_rules = {}
        str_to_rule = {}
        for loc, cell_data in cells.items():
            if "data_validation" not in cell_data:
                continue
            data_validation = cell_data["data_validation"]
            condition = data_validation["condition"]
            bc = gf.BooleanCondition(condition, data_validation["value"])
            dv = gf.DataValidationRule(bc)
            if str(dv) not in str_to_rule:
                str_to_rule[str(dv)] = dv
            if str(dv) in dv_rules:
                locs = dv_rules[str(dv)]
            else:
                locs = []
            locs.append(loc)
            dv_rules[str(dv)] = locs

        # Aggregate by location and format for validate.tsv
        dv_rows = clean_data_validation_rules(dv_rules, str_to_rule)
        sheet_dv_rules[st] = dv_rows

        # Get the ending row & col that have values
        # Otherwise we end up with a bunch of empty rows/columns
        # Also get the data validation
        max_row = 0
        max_col = 0

        for c in cells.keys():
            row, col = gspread.utils.a1_to_rowcol(c)
            if row > max_row:
                max_row = row
            if col > max_col:
                max_col = col

        # Cell label to format dict
        cell_to_format = {
            cell: data["format"]
            for cell, data in cells.items() if "format" in data
        }

        # Create a cell to format ID dict based on the format dict for each cell
        cell_to_format_id = {}
        last_fmt = None
        cell_range_start = None
        cell_range_end = None
        for cell, fmt in cell_to_format.items():
            if not fmt:
                if last_fmt:
                    if not cell_range_end or cell_range_start == cell_range_end:
                        cell_to_format_id[cell_range_start] = last_fmt
                    else:
                        cell_to_format_id[
                            f"{cell_range_start}:{cell_range_end}"] = last_fmt
                last_fmt = None
                cell_range_start = None
                cell_range_end = None
                continue

            key = json.dumps(fmt, sort_keys=True)

            if key in format_to_id:
                # Format already exists, assign that ID
                fmt_id = format_to_id[key]
            else:
                # Assign new ID
                fmt_id = next_fmt_id
                format_to_id[key] = fmt_id
                id_to_format[fmt_id] = fmt
                next_fmt_id += 1

            if last_fmt and fmt_id == last_fmt:
                # The last cell had a format and the this cell's format is the same as the last
                # so we increase the range
                cell_range_end = cell
            elif last_fmt and fmt_id != last_fmt:
                # The last cell had a format but it was different than the current format
                if cell_range_start == cell_range_end or not cell_range_end:
                    # Not a range, just a single cell (the previous cell)
                    cell_to_format_id[cell_range_start] = last_fmt
                else:
                    cell_to_format_id[
                        f"{cell_range_start}:{cell_range_end}"] = last_fmt
                # Restarting a new range at this cell
                cell_range_start = cell
                cell_range_end = None
            else:
                # No last formatting to compare to, start a new range
                cell_range_start = cell
                cell_range_end = cell
            last_fmt = fmt_id

        if cell_to_format_id:
            sheet_formats[st] = cell_to_format_id

        # Add the cell to note
        cell_to_note = {
            cell: data["note"]
            for cell, data in cells.items() if "note" in data
        }
        if cell_to_note:
            sheet_notes[st] = cell_to_note

        # Write values to .cogs/tracked/{sheet title}.tsv
        cached_path = get_cached_path(cogs_dir, st)
        with open(cached_path, "w") as f:
            lines = sheet.get_all_values()
            writer = csv.writer(f, delimiter="\t", lineterminator="\n")
            writer.writerows(lines)

    # Write or rewrite formats JSON with new dict
    with open(f"{cogs_dir}/formats.json", "w") as f:
        f.write(json.dumps(id_to_format, sort_keys=True, indent=4))

    # Update local sheets details in sheet.tsv with new IDs & details for current tracked sheets
    all_sheets = get_updated_sheet_details(tracked_sheets, remote_sheets,
                                           sheet_frozen)

    removed_titles = remove_sheets(cogs_dir, sheets, tracked_sheets,
                                   renamed_local, renamed_remote)

    # Add renamed-remote
    for old_title, details in renamed_remote.items():
        with open(f"{cogs_dir}/renamed.tsv", "a") as f:
            new_title = details["new"]
            new_path = details["path"]
            f.write(f"{old_title}\t{new_title}\t{new_path}\tremote\n")

    # Rewrite format.tsv and note.tsv with current remote formats & notes
    update_format(cogs_dir, sheet_formats, removed_titles)
    update_note(cogs_dir, sheet_notes, removed_titles)
    # Remove old data validation rules and rewrite with new
    with open(f"{cogs_dir}/validation.tsv", "w") as f:
        f.write("Sheet\tRange\tCondition\tValue\n")
    update_data_validation(cogs_dir, sheet_dv_rules, removed_titles)

    # Then update sheet.tsv
    all_sheets.extend(new_ignore)
    update_sheet(cogs_dir, all_sheets, removed_titles)
예제 #12
0
파일: add.py 프로젝트: ontodev/cogs
def add(path,
        title=None,
        description=None,
        freeze_row=0,
        freeze_column=0,
        verbose=False):
    """Add a table (TSV or CSV) to the COGS project. This updates sheet.tsv."""
    set_logging(verbose)
    cogs_dir = validate_cogs_project()
    sheets = get_tracked_sheets(cogs_dir)

    if path in sheets and sheets[path]["Ignore"]:
        # The provided path is just the title of an ignored sheet
        add_ignored(cogs_dir, sheets, path, None, description=description)
        return
    if title in sheets and sheets[title]["Ignore"]:
        # A path to write the ignored sheet was provided
        add_ignored(cogs_dir, sheets, title, path, description=description)
        return

    if not os.path.exists(path):
        # Create an empty file if the file doesn't exist
        f = open(path, "w")
        f.close()

    if not title:
        # Create the sheet title from file basename
        title = ntpath.basename(path).split(".")[0]

    # Make sure we aren't duplicating a table
    local_sheets = get_tracked_sheets(cogs_dir)
    if title in local_sheets:
        raise AddError(f"'{title}' sheet already exists in this project")

    # Make sure we aren't duplicating a path
    local_paths = {x["Path"]: t for t, x in local_sheets.items()}
    if path in local_paths.keys():
        other_title = local_paths[path]
        raise AddError(f"Local table {path} already exists as '{other_title}'")

    # Maybe get a description
    if not description:
        description = ""

    # Finally, add this TSV to sheet.tsv
    with open(f"{cogs_dir}/sheet.tsv", "a") as f:
        writer = csv.DictWriter(
            f,
            delimiter="\t",
            lineterminator="\n",
            fieldnames=[
                "ID",
                "Title",
                "Path",
                "Description",
                "Frozen Rows",
                "Frozen Columns",
                "Ignore",
            ],
        )
        # ID gets filled in when we add it to the Sheet
        writer.writerow({
            "ID": "",
            "Title": title,
            "Path": path,
            "Description": description,
            "Frozen Rows": freeze_row,
            "Frozen Columns": freeze_column,
            "Ignore": False,
        })

    logging.info(f"{title} successfully added to project")