def test_validate_submission(): path = "examples/spr-submission-valid.xlsx" table = workbooks.read(path, "Dataset") response = datasets.validate("spr", table) assert succeeded(response) path = "examples/spr-submission-invalid.xlsx" table = workbooks.read(path, "Dataset") response = datasets.validate("spr", table) assert failed(response) assert response["errors"] == [ "Error in row 2: 'COVIC 1' is not a valid COVIC antibody label " + "or control antibody label in column 'Antibody label'", "Error in row 2: 'X' is not of type 'non-negative integer' in column 'n'", "Error in row 2: '7000O' is not of type 'float_threshold_na'" " in column 'Standard deviation in M^-1s^-1'", "Error in row 2: 'Positive' is not a valid term in column 'Qualitiative measure'", ] upload = UploadedFile("examples/spr-submission-valid.xlsx") response = api.validate("spr", {"file": upload}) assert succeeded(response) upload = UploadedFile("examples/spr-submission-invalid.xlsx") response = api.validate("spr", {"file": upload}) assert failed(response) assert response["table"][0]["Antibody label"] == "COVIC 1"
def test_validate(): path = "examples/antibodies-submission-valid.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert succeeded(response) path = "examples/antibodies-submission-invalid.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert failed(response) assert response["errors"] == [ "Error in row 3: Duplicate value 'VD-Crotty 1' is not allowed in column 'Antibody name'", "Error in row 4: Missing required value in column 'Antibody name'", "Error in row 5: Missing required value in column 'Host'", "Error in row 6: 'IggA1' is not a valid term in column 'Isotype'", "Error in row 7: 'kapa' is not a valid term in column 'Light chain'", "Error in row 8: 'IGVH1-8' is not a valid term in column 'Heavy chain germline'", "Error in row 9: 'top' is not of type 'integer' in column 'Structural data'", ] upload = UploadedFile("examples/antibodies-submission-valid.xlsx") response = api.validate("antibodies", {"file": upload}) assert succeeded(response) upload = UploadedFile("examples/antibodies-submission-invalid.xlsx") response = api.validate("antibodies", {"file": upload}) assert failed(response) assert response["table"][0]["Antibody name"] == "VD-Crotty 1"
def expand(source, sheet=None): """Given a table, return a response in which "table" is the expanded form.""" response = read(source, sheet) if failed(response): return response table = response["table"] return success({"table": names.label_table(config.labels, table)})
def submit_assays(name, email, dataset_id, source): """Given the submitter's name and email, an existing dataset ID, and a source validate it and submit a set of assays. A successful response will include a table of submitted data.""" response = validate(dataset_id, source) if failed(response): return response table = response["table"] return datasets.submit(name, email, dataset_id, table)
def submit_antibodies(name, email, organization, source): """Given the submitter's name, email, organization, and a source validate and submit a set of antibodies. A successful response will include a table of submitted data and IDs.""" response = validate("antibodies", source) if failed(response): return response table = response["table"] return antibodies.submit(name, email, organization, table)
def test_pathological(): tests = ["missing-columns", "extra-columns"] for test in tests: path = f"tests/invalid-antibodies/{test}.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert failed(response) test = "blank-rows" path = f"tests/invalid-antibodies/{test}.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert succeeded(response) tables.print_tsv(response["table"]) assert len(response["table"]) == 9
def fill(datatype, source=None): """Given a datatype string and an optional table of data, fill the template for the given datatype, and return a response with "grids".""" if source: response = read(source) if failed(response): return response table = response["table"] grid = grids.table_to_grid(config.prefixes, config.fields, table) response = fill_rows(datatype, grid["rows"]) response["table"] = table response["grid"] = grid else: response = fill_rows(datatype) return response
def validate(datatype, source): """Given a datatype and a source, validate it and return a response with "grid" and maybe "errors", and an Excel file as "content".""" if datatype == "antibodies": sheet = "Antibodies" else: sheet = "Dataset" response = read(source, sheet) if failed(response): return response table = response["table"] if datatype == "antibodies": return antibodies.validate(table) else: return datasets.validate(datatype, table)
def read(source, sheet=None): """Read a source and return a response with a "table" key.""" if tables.is_table(source): return success({"table": source}) if responses.is_response(source): if "table" in source: return success({"table": source["table"]}) else: return failure(f"Response does not have 'table': '{source}'") if isinstance(source, str) or hasattr(source, "read"): return read_path(source, sheet) if requests.is_request(source): response = requests.read_file(source) if failed(response): return response table = workbooks.read(response["content"], sheet) return success({"table": table}) raise Exception(f"Unknown input '{source}'")
def convert(source, destination): """Given a source and a destimation (format or path) convert the table to that format and return a response with a "content" key.""" table = None grid = None if grids.is_grid(source): grid = source else: response = read(source) if failed(response): return response table = response["table"] output_format = destination.lower() if output_format not in ["tsv", "html"]: filename, extension = os.path.splitext(destination) output_format = extension.lower().lstrip(".") if output_format.lower() == "tsv": content = tables.table_to_tsv_string(table) return success({ "table": table, "content type": responses.tsv, "content": content }) elif output_format.lower() == "html": if not grid: grid = grids.table_to_grid(config.prefixes, config.fields, table) html = grids.grid_to_html(grid) content = templates.render_html("templates/grid.html", {"html": html}) return success({ "table": table, "grid": grid, "html": html, "content type": responses.html, "content": content, }) else: return failure(f"Unsupported output format for '{destination}'")
def submit(name, email, organization, table): """Given a new table of antibodies: 1. validate it 2. assign IDs and append them to the secrets, 3. append the blinded antibodies to the staging table, 4. return a response with merged IDs.""" response = validate(table) if failed(response): return response table = response["table"] # blank rows removed if not config.secret: return failure("CVDB_SECRET directory is not configured") secret = [] path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv") if os.path.isfile(path): secret = tables.read_tsv(path) blind = config.read_blinded_antibodies() if len(secret) != len(blind): return failure(f"Different number of antibody rows: {len(secret)} != {len(blind)}") current_id = "COVIC:0" if len(blind) > 0: current_id = blind[-1]["ab_id"] submission = [] for row in table: current_id = names.increment_id(current_id) # secrets: write this to the secret repo secret_row = OrderedDict() secret_row["ab_id"] = current_id secret_row["ab_name"] = row["Antibody name"] secret_row["ab_details"] = row["Antibody details"] secret_row["ab_comment"] = row["Antibody comment"] secret_row["org_name"] = organization secret_row["submitter_email"] = email secret.append(secret_row) # blind: write this to staging/public repos blind_row = OrderedDict() blind_row["ab_id"] = current_id # submission: return this to the submitter submission_row = OrderedDict() submission_row["ab_id"] = current_id submission_row["ab_name"] = row["Antibody name"] # for each header, add cells to blind and submission for header in headers[1:]: column = header["value"] value = row[header["label"]] if column.endswith("_label"): i = config.ids.get(value, "") blind_row[column.replace("_label", "_id")] = i submission_row[column.replace("_label", "_id")] = i submission_row[column] = value else: blind_row[column] = value submission_row[column] = value blind.append(blind_row) submission.append(submission_row) author = Actor(name, email) # secret try: path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv") tables.write_tsv(secret, path) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.secret.index.add([path]) config.secret.index.commit("Submit antibodies", author=author, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) # staging try: path = os.path.join(config.staging.working_tree_dir, "antibodies.tsv") tables.write_tsv(blind, path) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.staging.index.add([path]) config.staging.index.commit("Submit antibodies", author=author, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) # public if not config.public: return failure("CVDB_PUBLIC directory is not configured") try: path = os.path.join(config.public.working_tree_dir, "antibodies.tsv") tables.write_tsv(blind, path) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.public.index.add([path]) config.public.index.commit("Submit antibodies", author=config.covic, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) grid = grids.table_to_grid(config.prefixes, config.fields, submission) print("Submitted antibodies") return success({"table": submission, "grid": grid})
def submit(name, email, dataset_id, table): """Given a dataset ID and a new table of assays, validate it, save it to staging, and commit.""" response = validate(dataset_id, table) if failed(response): return response table = response["table"] # remove blank rows ab_ids = {} for ab in config.read_blinded_antibodies(): ab_id = ab["ab_id"] ab_label = ab_id.replace(":", "-") ab_ids[ab_label] = ab_id for row in config.ab_controls.values(): ab_ids[row["label"]] = row["id"] assay_headers = get_assay_headers(dataset_id) assays = [] for row in table: assay = OrderedDict() for header in assay_headers: value = header["value"] label = header["label"] if value == "ab_label": row[label] = row[label].strip() assay["ab_id"] = ab_ids[row[label]] else: assay[value] = row[label] assays.append(assay) author = Actor(name, email) # staging if not config.staging: return failure("CVDB_STAGING directory is not configured") dataset_path = os.path.join(config.staging.working_tree_dir, "datasets", str(dataset_id)) paths = [] try: set_staging_value(dataset_id, "Dataset status", "submitted") path = os.path.join(dataset_path, "dataset.yml") paths.append(path) except Exception as e: return failure("Failed to update dataset status", {"exception": e}) try: path = os.path.join(dataset_path, "assays.tsv") tables.write_tsv(assays, path) paths.append(path) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.staging.index.add(paths) config.staging.index.commit( f"Submit assays to dataset {dataset_id}", author=author, committer=config.covic, ) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) grid = grids.table_to_grid(config.prefixes, config.fields, table) print(f"Submitted assays to dataset {dataset_id}") return success({"table": table, "grid": grid, "dataset_id": dataset_id})