Ejemplo n.º 1
0
def test_validate():
    path = "examples/antibodies-submission-valid.tsv"
    table = tables.read_tsv(path)
    response = antibodies.validate(table)
    assert succeeded(response)

    path = "examples/antibodies-submission-invalid.tsv"
    table = tables.read_tsv(path)
    response = antibodies.validate(table)
    assert failed(response)
    assert response["errors"] == [
        "Error in row 3: Duplicate value 'VD-Crotty 1' is not allowed in column 'Antibody name'",
        "Error in row 4: Missing required value in column 'Antibody name'",
        "Error in row 5: Missing required value in column 'Host'",
        "Error in row 6: 'IggA1' is not a valid term in column 'Isotype'",
        "Error in row 7: 'kapa' is not a valid term in column 'Light chain'",
        "Error in row 8: 'IGVH1-8' is not a valid term in column 'Heavy chain germline'",
        "Error in row 9: 'top' is not of type 'integer' in column 'Structural data'",
    ]

    upload = UploadedFile("examples/antibodies-submission-valid.xlsx")
    response = api.validate("antibodies", {"file": upload})
    assert succeeded(response)

    upload = UploadedFile("examples/antibodies-submission-invalid.xlsx")
    response = api.validate("antibodies", {"file": upload})
    assert failed(response)
    assert response["table"][0]["Antibody name"] == "VD-Crotty 1"
Ejemplo n.º 2
0
def test_pathological():
    tests = ["missing-columns", "extra-columns"]
    for test in tests:
        path = f"tests/invalid-antibodies/{test}.tsv"
        table = tables.read_tsv(path)
        response = antibodies.validate(table)
        assert failed(response)

    test = "blank-rows"
    path = f"tests/invalid-antibodies/{test}.tsv"
    table = tables.read_tsv(path)
    response = antibodies.validate(table)
    assert succeeded(response)
    tables.print_tsv(response["table"])
    assert len(response["table"]) == 9
Ejemplo n.º 3
0
def test_examples():
    example = "antibodies-submission"
    table = []
    excel = workbooks.read("examples/{0}.xlsx".format(example))
    assert table == excel

    examples = ["antibodies-submission-valid", "antibodies-submission-invalid"]
    for example in examples:
        tsv = tables.read_tsv("examples/{0}.tsv".format(example))
        excel = workbooks.read("examples/{0}.xlsx".format(example))
        assert tsv == excel

    example = "antibodies-submission-invalid"
    tsv = tables.read_tsv("examples/{0}.tsv".format(example))
    example = "antibodies-submission-invalid-highlighted"
    excel = workbooks.read("examples/{0}.xlsx".format(example))
    assert tsv == excel
Ejemplo n.º 4
0
def read_data(
    antibodies_tsv_path,
    dataset_path,
):
    # ab_list = antibodies.read_antibodies(config.labels, antibodies_tsv_path)
    ab_table = tables.read_tsv(antibodies_tsv_path)
    grid = grids.table_to_grid(config.prefixes, config.fields, ab_table)
    cell = grids.value_cell("")
    cell["colspan"] = len(grid["headers"][0])
    grid["headers"].insert(0, [cell])

    for root, dirs, files in os.walk(dataset_path):
        for name in files:
            if name.startswith("antibodies"):
                continue
            if name.endswith("-valid-expanded.tsv"):
                assays_tsv_path = os.path.join(root, name)
                assay_name = name.replace("-submission-valid-expanded.tsv",
                                          "").replace("-", " ")
                assay_table = tables.read_tsv(assays_tsv_path)
                columns = len(assay_table[0].keys()) - 1
                assay_grid = grids.table_to_grid(config.prefixes,
                                                 config.fields, assay_table)

                ab_map = {}
                for row in assay_grid["rows"]:
                    ab_label = row[0]["value"]
                    row.pop(0)
                    ab_map[ab_label] = row

                header = grids.value_cell(assay_name)
                header["colspan"] = columns
                grid["headers"][0].append(header)
                grid["headers"][1] += assay_grid["headers"][0][1:]

                for row in grid["rows"]:
                    ab_label = row[0]["value"].replace(":", " ")
                    if ab_label in ab_map:
                        row += ab_map[ab_label]
                    else:
                        for column in range(0, columns):
                            row.append(grids.value_cell(""))

    grid[
        "message"] = "This is the public view with all antibodies (blinded) and assays."
    return grid
Ejemplo n.º 5
0
def read_blinded_antibodies():
    "Return a list of dicts of blinded antibodies"
    if not staging:
        raise Exception("CVDB_STAGING directory is not configured")
    blind = []
    path = os.path.join(staging.working_tree_dir, "antibodies.tsv")
    if os.path.isfile(path):
        blind = tables.read_tsv(path)
    return blind
Ejemplo n.º 6
0
def read_fields(fields_tsv_path):
    """Read the fields table and return the fields map."""
    fields = {}
    for row in tables.read_tsv(fields_tsv_path):
        fields[row["field"]] = {
            k: v
            for k, v in row.items() if v is not None and v.strip() != ""
        }
    return fields
Ejemplo n.º 7
0
def read_labels(labels_tsv_path):
    """Read the labels table and return the labels map."""
    labels = {}
    for row in tables.read_tsv(labels_tsv_path):
        id = row["ID"]
        if id in labels:
            raise Exception(f"Duplicate ID {id}")
        labels[id] = row["LABEL"]
    return labels
Ejemplo n.º 8
0
def read_ids(labels_tsv_path):
    """Read the labels table and return the IDs map."""
    ids = {}
    for row in tables.read_tsv(labels_tsv_path):
        label = row["LABEL"]
        if label in ids:
            raise Exception("Duplicate label '{label}'")
        ids[label] = row["ID"]
    return ids
Ejemplo n.º 9
0
def read_data(dataset_id):
    """Read the metadata and data for a dataset."""
    dataset = read_dataset_yml(dataset_id)

    assays_tsv_path = os.path.join(get_staging_path(dataset_id), "assays.tsv")
    assays = []
    for row in tables.read_tsv(assays_tsv_path):
        assays.append(row)

    return {"dataset": dataset, "assays": assays}
Ejemplo n.º 10
0
def read_terms(terms_tsv_path):
    """Read a terms table and return the a dictionary with labels for keys."""
    terms = {}
    for row in tables.read_tsv(terms_tsv_path):
        if row["id"] == "ID":
            continue
        if "notes" in row:
            del row["notes"]
        terms[row["label"]] = row
    return terms
Ejemplo n.º 11
0
def read_path(path, sheet=None):
    """Read a TSV or Excel from a path and return a response with a "table" key."""
    table = None
    filename, extension = os.path.splitext(path)
    extension = extension.lower()
    if extension == ".xlsx":
        table = workbooks.read(path, sheet)
    elif extension == ".tsv":
        table = tables.read_tsv(path)
    else:
        return failure(f"Unsupported input format for '{path}'")
    return success({"table": table})
Ejemplo n.º 12
0
def get_secret_value(dataset_id, key=None):
    """Given a dataset ID and an optional key
    return the value or values from the dataset secret metadata."""
    if key in ["ds_id"]:
        return failure(f"Key '{key}' cannot be updated")
    path = os.path.join(config.secret.working_tree_dir, "datasets.tsv")
    rows = tables.read_tsv(path)
    for row in rows:
        if row["ds_id"] == dataset_id:
            if key:
                return row[key]
            else:
                return row
    raise Exception(f"No row found for dataset '{dataset_id}'")
Ejemplo n.º 13
0
def test_examples():
    examples = ["spr-submission"]
    for example in examples:
        table = []
        excel = workbooks.read("examples/{0}.xlsx".format(example))
        assert table == excel

    examples = [
        "spr-submission-valid",
        "spr-submission-invalid",
    ]
    for example in examples:
        tsv = tables.read_tsv("examples/{0}.tsv".format(example))
        excel = workbooks.read("examples/{0}.xlsx".format(example))
        assert tables.table_to_lists(tsv)[1:] == tables.table_to_lists(
            excel)[1:]
Ejemplo n.º 14
0
def set_secret_value(dataset_id, key, value):
    """Given a dataset ID, key, and value,
    update the secret `datasets.tsv`."""
    if key in ["ds_id"]:
        return failure(f"Key '{key}' cannot be updated")
    path = os.path.join(config.secret.working_tree_dir, "datasets.tsv")
    rows = tables.read_tsv(path)
    done = False
    for row in rows:
        if row["ds_id"] == dataset_id:
            row[key] = str(value)
            done = True
        elif key not in row:
            row[key] = None
    if done:
        tables.write_tsv(rows, path)
    else:
        raise Exception(f"No row found for dataset '{dataset_id}'")
Ejemplo n.º 15
0
def read_prefixes(prefixes_tsv_path):
    """Read the prefixes table and return the prefixes map."""
    prefixes = {}
    for row in tables.read_tsv(prefixes_tsv_path):
        prefixes[row["prefix"]] = row["base"]
    return prefixes
def submit(name, email, organization, table):
    """Given a new table of antibodies:
    1. validate it
    2. assign IDs and append them to the secrets,
    3. append the blinded antibodies to the staging table,
    4. return a response with merged IDs."""
    response = validate(table)
    if failed(response):
        return response
    table = response["table"]  # blank rows removed

    if not config.secret:
        return failure("CVDB_SECRET directory is not configured")
    secret = []
    path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv")
    if os.path.isfile(path):
        secret = tables.read_tsv(path)

    blind = config.read_blinded_antibodies()

    if len(secret) != len(blind):
        return failure(f"Different number of antibody rows: {len(secret)} != {len(blind)}")

    current_id = "COVIC:0"
    if len(blind) > 0:
        current_id = blind[-1]["ab_id"]

    submission = []
    for row in table:
        current_id = names.increment_id(current_id)

        # secrets: write this to the secret repo
        secret_row = OrderedDict()
        secret_row["ab_id"] = current_id
        secret_row["ab_name"] = row["Antibody name"]
        secret_row["ab_details"] = row["Antibody details"]
        secret_row["ab_comment"] = row["Antibody comment"]
        secret_row["org_name"] = organization
        secret_row["submitter_email"] = email
        secret.append(secret_row)

        # blind: write this to staging/public repos
        blind_row = OrderedDict()
        blind_row["ab_id"] = current_id

        # submission: return this to the submitter
        submission_row = OrderedDict()
        submission_row["ab_id"] = current_id
        submission_row["ab_name"] = row["Antibody name"]

        # for each header, add cells to blind and submission
        for header in headers[1:]:
            column = header["value"]
            value = row[header["label"]]
            if column.endswith("_label"):
                i = config.ids.get(value, "")
                blind_row[column.replace("_label", "_id")] = i
                submission_row[column.replace("_label", "_id")] = i
                submission_row[column] = value
            else:
                blind_row[column] = value
                submission_row[column] = value

        blind.append(blind_row)
        submission.append(submission_row)

    author = Actor(name, email)

    # secret
    try:
        path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(secret, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.secret.index.add([path])
        config.secret.index.commit("Submit antibodies", author=author, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # staging
    try:
        path = os.path.join(config.staging.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(blind, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.staging.index.add([path])
        config.staging.index.commit("Submit antibodies", author=author, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # public
    if not config.public:
        return failure("CVDB_PUBLIC directory is not configured")
    try:
        path = os.path.join(config.public.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(blind, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.public.index.add([path])
        config.public.index.commit("Submit antibodies", author=config.covic, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    grid = grids.table_to_grid(config.prefixes, config.fields, submission)
    print("Submitted antibodies")
    return success({"table": submission, "grid": grid})
Ejemplo n.º 17
0
def label_tsv(labels, tsv_path):
    """Read a TSV table and then label it."""
    return label_table(labels, tables.read_tsv(tsv_path))
Ejemplo n.º 18
0
def create(name, email, columns=[]):
    if not config.staging:
        return failure("CVDB_STAGING directory is not configured")

    for column in columns:
        if column in config.fields:
            continue
        if column.startswith("obi_") or column.startswith("ontie_"):
            assay_id = column.replace("obi_",
                                      "OBI:").replace("ontie_", "ONTIE:")
            root_id = (assay_id.replace("_stddev", "").replace(
                "_normalized", "").replace("_qualitative", ""))
            if assay_id in config.labels:
                continue
            if root_id in config.labels:
                if column.endswith("_stddev"):
                    continue
                if column.endswith("_normalized"):
                    continue
                if column.endswith("_qualitative"):
                    continue
        return failure(f"Unrecognized column '{column}'")

    datasets_path = os.path.join(config.staging.working_tree_dir, "datasets")
    current_id = 0
    if not os.path.exists(datasets_path):
        os.makedirs(datasets_path)
    if not os.path.isdir(datasets_path):
        return failure(f"'{datasets_path}' is not a directory")
    for root, dirs, files in os.walk(datasets_path):
        for name in dirs:
            if re.match(r"\d+", name):
                current_id = max(current_id, int(name))
    dataset_id = current_id + 1

    author = Actor(name, email)

    # secret
    try:
        path = os.path.join(config.secret.working_tree_dir, "datasets.tsv")
        datasets = []
        if os.path.isfile(path):
            datasets = tables.read_tsv(path)
        datasets.append(
            OrderedDict({
                "ds_id": dataset_id,
                "submitter_email": email
            }))
        tables.write_tsv(datasets, path)
    except Exception as e:
        return failure(f"Failed to update '{path}'", {"exception": e})
    try:
        config.secret.index.add([path])
        config.secret.index.commit(f"Create dataset {dataset_id}",
                                   author=author,
                                   committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # staging
    try:
        dataset_path = os.path.join(datasets_path, str(dataset_id))
        os.mkdir(dataset_path)
    except Exception as e:
        return failure(f"Failed to create '{dataset_path}'", {"exception": e})
    try:
        dataset = {
            "Dataset ID": f"ds:{dataset_id}",
            "Dataset status": "configured",
            "Columns": columns,
        }
        path = os.path.join(dataset_path, "dataset.yml")
        with open(path, "w") as outfile:
            yaml.dump(dataset, outfile, sort_keys=False)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.staging.index.add([path])
        config.staging.index.commit(f"Create dataset {dataset_id}",
                                    author=author,
                                    committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    print(f"Created dataset {dataset_id}")
    return success({"dataset_id": dataset_id})