def test_validate(): path = "examples/antibodies-submission-valid.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert succeeded(response) path = "examples/antibodies-submission-invalid.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert failed(response) assert response["errors"] == [ "Error in row 3: Duplicate value 'VD-Crotty 1' is not allowed in column 'Antibody name'", "Error in row 4: Missing required value in column 'Antibody name'", "Error in row 5: Missing required value in column 'Host'", "Error in row 6: 'IggA1' is not a valid term in column 'Isotype'", "Error in row 7: 'kapa' is not a valid term in column 'Light chain'", "Error in row 8: 'IGVH1-8' is not a valid term in column 'Heavy chain germline'", "Error in row 9: 'top' is not of type 'integer' in column 'Structural data'", ] upload = UploadedFile("examples/antibodies-submission-valid.xlsx") response = api.validate("antibodies", {"file": upload}) assert succeeded(response) upload = UploadedFile("examples/antibodies-submission-invalid.xlsx") response = api.validate("antibodies", {"file": upload}) assert failed(response) assert response["table"][0]["Antibody name"] == "VD-Crotty 1"
def test_pathological(): tests = ["missing-columns", "extra-columns"] for test in tests: path = f"tests/invalid-antibodies/{test}.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert failed(response) test = "blank-rows" path = f"tests/invalid-antibodies/{test}.tsv" table = tables.read_tsv(path) response = antibodies.validate(table) assert succeeded(response) tables.print_tsv(response["table"]) assert len(response["table"]) == 9
def test_examples(): example = "antibodies-submission" table = [] excel = workbooks.read("examples/{0}.xlsx".format(example)) assert table == excel examples = ["antibodies-submission-valid", "antibodies-submission-invalid"] for example in examples: tsv = tables.read_tsv("examples/{0}.tsv".format(example)) excel = workbooks.read("examples/{0}.xlsx".format(example)) assert tsv == excel example = "antibodies-submission-invalid" tsv = tables.read_tsv("examples/{0}.tsv".format(example)) example = "antibodies-submission-invalid-highlighted" excel = workbooks.read("examples/{0}.xlsx".format(example)) assert tsv == excel
def read_data( antibodies_tsv_path, dataset_path, ): # ab_list = antibodies.read_antibodies(config.labels, antibodies_tsv_path) ab_table = tables.read_tsv(antibodies_tsv_path) grid = grids.table_to_grid(config.prefixes, config.fields, ab_table) cell = grids.value_cell("") cell["colspan"] = len(grid["headers"][0]) grid["headers"].insert(0, [cell]) for root, dirs, files in os.walk(dataset_path): for name in files: if name.startswith("antibodies"): continue if name.endswith("-valid-expanded.tsv"): assays_tsv_path = os.path.join(root, name) assay_name = name.replace("-submission-valid-expanded.tsv", "").replace("-", " ") assay_table = tables.read_tsv(assays_tsv_path) columns = len(assay_table[0].keys()) - 1 assay_grid = grids.table_to_grid(config.prefixes, config.fields, assay_table) ab_map = {} for row in assay_grid["rows"]: ab_label = row[0]["value"] row.pop(0) ab_map[ab_label] = row header = grids.value_cell(assay_name) header["colspan"] = columns grid["headers"][0].append(header) grid["headers"][1] += assay_grid["headers"][0][1:] for row in grid["rows"]: ab_label = row[0]["value"].replace(":", " ") if ab_label in ab_map: row += ab_map[ab_label] else: for column in range(0, columns): row.append(grids.value_cell("")) grid[ "message"] = "This is the public view with all antibodies (blinded) and assays." return grid
def read_blinded_antibodies(): "Return a list of dicts of blinded antibodies" if not staging: raise Exception("CVDB_STAGING directory is not configured") blind = [] path = os.path.join(staging.working_tree_dir, "antibodies.tsv") if os.path.isfile(path): blind = tables.read_tsv(path) return blind
def read_fields(fields_tsv_path): """Read the fields table and return the fields map.""" fields = {} for row in tables.read_tsv(fields_tsv_path): fields[row["field"]] = { k: v for k, v in row.items() if v is not None and v.strip() != "" } return fields
def read_labels(labels_tsv_path): """Read the labels table and return the labels map.""" labels = {} for row in tables.read_tsv(labels_tsv_path): id = row["ID"] if id in labels: raise Exception(f"Duplicate ID {id}") labels[id] = row["LABEL"] return labels
def read_ids(labels_tsv_path): """Read the labels table and return the IDs map.""" ids = {} for row in tables.read_tsv(labels_tsv_path): label = row["LABEL"] if label in ids: raise Exception("Duplicate label '{label}'") ids[label] = row["ID"] return ids
def read_data(dataset_id): """Read the metadata and data for a dataset.""" dataset = read_dataset_yml(dataset_id) assays_tsv_path = os.path.join(get_staging_path(dataset_id), "assays.tsv") assays = [] for row in tables.read_tsv(assays_tsv_path): assays.append(row) return {"dataset": dataset, "assays": assays}
def read_terms(terms_tsv_path): """Read a terms table and return the a dictionary with labels for keys.""" terms = {} for row in tables.read_tsv(terms_tsv_path): if row["id"] == "ID": continue if "notes" in row: del row["notes"] terms[row["label"]] = row return terms
def read_path(path, sheet=None): """Read a TSV or Excel from a path and return a response with a "table" key.""" table = None filename, extension = os.path.splitext(path) extension = extension.lower() if extension == ".xlsx": table = workbooks.read(path, sheet) elif extension == ".tsv": table = tables.read_tsv(path) else: return failure(f"Unsupported input format for '{path}'") return success({"table": table})
def get_secret_value(dataset_id, key=None): """Given a dataset ID and an optional key return the value or values from the dataset secret metadata.""" if key in ["ds_id"]: return failure(f"Key '{key}' cannot be updated") path = os.path.join(config.secret.working_tree_dir, "datasets.tsv") rows = tables.read_tsv(path) for row in rows: if row["ds_id"] == dataset_id: if key: return row[key] else: return row raise Exception(f"No row found for dataset '{dataset_id}'")
def test_examples(): examples = ["spr-submission"] for example in examples: table = [] excel = workbooks.read("examples/{0}.xlsx".format(example)) assert table == excel examples = [ "spr-submission-valid", "spr-submission-invalid", ] for example in examples: tsv = tables.read_tsv("examples/{0}.tsv".format(example)) excel = workbooks.read("examples/{0}.xlsx".format(example)) assert tables.table_to_lists(tsv)[1:] == tables.table_to_lists( excel)[1:]
def set_secret_value(dataset_id, key, value): """Given a dataset ID, key, and value, update the secret `datasets.tsv`.""" if key in ["ds_id"]: return failure(f"Key '{key}' cannot be updated") path = os.path.join(config.secret.working_tree_dir, "datasets.tsv") rows = tables.read_tsv(path) done = False for row in rows: if row["ds_id"] == dataset_id: row[key] = str(value) done = True elif key not in row: row[key] = None if done: tables.write_tsv(rows, path) else: raise Exception(f"No row found for dataset '{dataset_id}'")
def read_prefixes(prefixes_tsv_path): """Read the prefixes table and return the prefixes map.""" prefixes = {} for row in tables.read_tsv(prefixes_tsv_path): prefixes[row["prefix"]] = row["base"] return prefixes
def submit(name, email, organization, table): """Given a new table of antibodies: 1. validate it 2. assign IDs and append them to the secrets, 3. append the blinded antibodies to the staging table, 4. return a response with merged IDs.""" response = validate(table) if failed(response): return response table = response["table"] # blank rows removed if not config.secret: return failure("CVDB_SECRET directory is not configured") secret = [] path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv") if os.path.isfile(path): secret = tables.read_tsv(path) blind = config.read_blinded_antibodies() if len(secret) != len(blind): return failure(f"Different number of antibody rows: {len(secret)} != {len(blind)}") current_id = "COVIC:0" if len(blind) > 0: current_id = blind[-1]["ab_id"] submission = [] for row in table: current_id = names.increment_id(current_id) # secrets: write this to the secret repo secret_row = OrderedDict() secret_row["ab_id"] = current_id secret_row["ab_name"] = row["Antibody name"] secret_row["ab_details"] = row["Antibody details"] secret_row["ab_comment"] = row["Antibody comment"] secret_row["org_name"] = organization secret_row["submitter_email"] = email secret.append(secret_row) # blind: write this to staging/public repos blind_row = OrderedDict() blind_row["ab_id"] = current_id # submission: return this to the submitter submission_row = OrderedDict() submission_row["ab_id"] = current_id submission_row["ab_name"] = row["Antibody name"] # for each header, add cells to blind and submission for header in headers[1:]: column = header["value"] value = row[header["label"]] if column.endswith("_label"): i = config.ids.get(value, "") blind_row[column.replace("_label", "_id")] = i submission_row[column.replace("_label", "_id")] = i submission_row[column] = value else: blind_row[column] = value submission_row[column] = value blind.append(blind_row) submission.append(submission_row) author = Actor(name, email) # secret try: path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv") tables.write_tsv(secret, path) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.secret.index.add([path]) config.secret.index.commit("Submit antibodies", author=author, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) # staging try: path = os.path.join(config.staging.working_tree_dir, "antibodies.tsv") tables.write_tsv(blind, path) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.staging.index.add([path]) config.staging.index.commit("Submit antibodies", author=author, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) # public if not config.public: return failure("CVDB_PUBLIC directory is not configured") try: path = os.path.join(config.public.working_tree_dir, "antibodies.tsv") tables.write_tsv(blind, path) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.public.index.add([path]) config.public.index.commit("Submit antibodies", author=config.covic, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) grid = grids.table_to_grid(config.prefixes, config.fields, submission) print("Submitted antibodies") return success({"table": submission, "grid": grid})
def label_tsv(labels, tsv_path): """Read a TSV table and then label it.""" return label_table(labels, tables.read_tsv(tsv_path))
def create(name, email, columns=[]): if not config.staging: return failure("CVDB_STAGING directory is not configured") for column in columns: if column in config.fields: continue if column.startswith("obi_") or column.startswith("ontie_"): assay_id = column.replace("obi_", "OBI:").replace("ontie_", "ONTIE:") root_id = (assay_id.replace("_stddev", "").replace( "_normalized", "").replace("_qualitative", "")) if assay_id in config.labels: continue if root_id in config.labels: if column.endswith("_stddev"): continue if column.endswith("_normalized"): continue if column.endswith("_qualitative"): continue return failure(f"Unrecognized column '{column}'") datasets_path = os.path.join(config.staging.working_tree_dir, "datasets") current_id = 0 if not os.path.exists(datasets_path): os.makedirs(datasets_path) if not os.path.isdir(datasets_path): return failure(f"'{datasets_path}' is not a directory") for root, dirs, files in os.walk(datasets_path): for name in dirs: if re.match(r"\d+", name): current_id = max(current_id, int(name)) dataset_id = current_id + 1 author = Actor(name, email) # secret try: path = os.path.join(config.secret.working_tree_dir, "datasets.tsv") datasets = [] if os.path.isfile(path): datasets = tables.read_tsv(path) datasets.append( OrderedDict({ "ds_id": dataset_id, "submitter_email": email })) tables.write_tsv(datasets, path) except Exception as e: return failure(f"Failed to update '{path}'", {"exception": e}) try: config.secret.index.add([path]) config.secret.index.commit(f"Create dataset {dataset_id}", author=author, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) # staging try: dataset_path = os.path.join(datasets_path, str(dataset_id)) os.mkdir(dataset_path) except Exception as e: return failure(f"Failed to create '{dataset_path}'", {"exception": e}) try: dataset = { "Dataset ID": f"ds:{dataset_id}", "Dataset status": "configured", "Columns": columns, } path = os.path.join(dataset_path, "dataset.yml") with open(path, "w") as outfile: yaml.dump(dataset, outfile, sort_keys=False) except Exception as e: return failure(f"Failed to write '{path}'", {"exception": e}) try: config.staging.index.add([path]) config.staging.index.commit(f"Create dataset {dataset_id}", author=author, committer=config.covic) except Exception as e: return failure(f"Failed to commit '{path}'", {"exception": e}) print(f"Created dataset {dataset_id}") return success({"dataset_id": dataset_id})