Beispiel #1
0
def test_validate_submission():
    path = "examples/spr-submission-valid.xlsx"
    table = workbooks.read(path, "Dataset")
    response = datasets.validate("spr", table)
    assert succeeded(response)

    path = "examples/spr-submission-invalid.xlsx"
    table = workbooks.read(path, "Dataset")
    response = datasets.validate("spr", table)
    assert failed(response)
    assert response["errors"] == [
        "Error in row 2: 'COVIC 1' is not a valid COVIC antibody label " +
        "or control antibody label in column 'Antibody label'",
        "Error in row 2: 'X' is not of type 'non-negative integer' in column 'n'",
        "Error in row 2: '7000O' is not of type 'float_threshold_na'"
        " in column 'Standard deviation in M^-1s^-1'",
        "Error in row 2: 'Positive' is not a valid term in column 'Qualitiative measure'",
    ]

    upload = UploadedFile("examples/spr-submission-valid.xlsx")
    response = api.validate("spr", {"file": upload})
    assert succeeded(response)

    upload = UploadedFile("examples/spr-submission-invalid.xlsx")
    response = api.validate("spr", {"file": upload})
    assert failed(response)
    assert response["table"][0]["Antibody label"] == "COVIC 1"
Beispiel #2
0
def test_validate():
    path = "examples/antibodies-submission-valid.tsv"
    table = tables.read_tsv(path)
    response = antibodies.validate(table)
    assert succeeded(response)

    path = "examples/antibodies-submission-invalid.tsv"
    table = tables.read_tsv(path)
    response = antibodies.validate(table)
    assert failed(response)
    assert response["errors"] == [
        "Error in row 3: Duplicate value 'VD-Crotty 1' is not allowed in column 'Antibody name'",
        "Error in row 4: Missing required value in column 'Antibody name'",
        "Error in row 5: Missing required value in column 'Host'",
        "Error in row 6: 'IggA1' is not a valid term in column 'Isotype'",
        "Error in row 7: 'kapa' is not a valid term in column 'Light chain'",
        "Error in row 8: 'IGVH1-8' is not a valid term in column 'Heavy chain germline'",
        "Error in row 9: 'top' is not of type 'integer' in column 'Structural data'",
    ]

    upload = UploadedFile("examples/antibodies-submission-valid.xlsx")
    response = api.validate("antibodies", {"file": upload})
    assert succeeded(response)

    upload = UploadedFile("examples/antibodies-submission-invalid.xlsx")
    response = api.validate("antibodies", {"file": upload})
    assert failed(response)
    assert response["table"][0]["Antibody name"] == "VD-Crotty 1"
def expand(source, sheet=None):
    """Given a table, return a response in which "table" is the expanded form."""
    response = read(source, sheet)
    if failed(response):
        return response
    table = response["table"]
    return success({"table": names.label_table(config.labels, table)})
def submit_assays(name, email, dataset_id, source):
    """Given the submitter's name and email, an existing dataset ID, and a source
    validate it and submit a set of assays.
    A successful response will include a table of submitted data."""
    response = validate(dataset_id, source)
    if failed(response):
        return response
    table = response["table"]
    return datasets.submit(name, email, dataset_id, table)
def submit_antibodies(name, email, organization, source):
    """Given the submitter's name, email, organization, and a source
    validate and submit a set of antibodies.
    A successful response will include a table of submitted data and IDs."""
    response = validate("antibodies", source)
    if failed(response):
        return response
    table = response["table"]
    return antibodies.submit(name, email, organization, table)
Beispiel #6
0
def test_pathological():
    tests = ["missing-columns", "extra-columns"]
    for test in tests:
        path = f"tests/invalid-antibodies/{test}.tsv"
        table = tables.read_tsv(path)
        response = antibodies.validate(table)
        assert failed(response)

    test = "blank-rows"
    path = f"tests/invalid-antibodies/{test}.tsv"
    table = tables.read_tsv(path)
    response = antibodies.validate(table)
    assert succeeded(response)
    tables.print_tsv(response["table"])
    assert len(response["table"]) == 9
def fill(datatype, source=None):
    """Given a datatype string and an optional table of data,
    fill the template for the given datatype,
    and return a response with "grids"."""
    if source:
        response = read(source)
        if failed(response):
            return response
        table = response["table"]
        grid = grids.table_to_grid(config.prefixes, config.fields, table)
        response = fill_rows(datatype, grid["rows"])
        response["table"] = table
        response["grid"] = grid
    else:
        response = fill_rows(datatype)
    return response
def validate(datatype, source):
    """Given a datatype and a source,
    validate it and return a response with "grid" and maybe "errors",
    and an Excel file as "content"."""
    if datatype == "antibodies":
        sheet = "Antibodies"
    else:
        sheet = "Dataset"
    response = read(source, sheet)
    if failed(response):
        return response

    table = response["table"]
    if datatype == "antibodies":
        return antibodies.validate(table)
    else:
        return datasets.validate(datatype, table)
def read(source, sheet=None):
    """Read a source and return a response with a "table" key."""
    if tables.is_table(source):
        return success({"table": source})
    if responses.is_response(source):
        if "table" in source:
            return success({"table": source["table"]})
        else:
            return failure(f"Response does not have 'table': '{source}'")
    if isinstance(source, str) or hasattr(source, "read"):
        return read_path(source, sheet)
    if requests.is_request(source):
        response = requests.read_file(source)
        if failed(response):
            return response
        table = workbooks.read(response["content"], sheet)
        return success({"table": table})
    raise Exception(f"Unknown input '{source}'")
def convert(source, destination):
    """Given a source and a destimation (format or path)
    convert the table to that format
    and return a response with a "content" key."""
    table = None
    grid = None

    if grids.is_grid(source):
        grid = source
    else:
        response = read(source)
        if failed(response):
            return response
        table = response["table"]

    output_format = destination.lower()
    if output_format not in ["tsv", "html"]:
        filename, extension = os.path.splitext(destination)
        output_format = extension.lower().lstrip(".")
    if output_format.lower() == "tsv":
        content = tables.table_to_tsv_string(table)
        return success({
            "table": table,
            "content type": responses.tsv,
            "content": content
        })
    elif output_format.lower() == "html":
        if not grid:
            grid = grids.table_to_grid(config.prefixes, config.fields, table)
        html = grids.grid_to_html(grid)
        content = templates.render_html("templates/grid.html", {"html": html})
        return success({
            "table": table,
            "grid": grid,
            "html": html,
            "content type": responses.html,
            "content": content,
        })
    else:
        return failure(f"Unsupported output format for '{destination}'")
def submit(name, email, organization, table):
    """Given a new table of antibodies:
    1. validate it
    2. assign IDs and append them to the secrets,
    3. append the blinded antibodies to the staging table,
    4. return a response with merged IDs."""
    response = validate(table)
    if failed(response):
        return response
    table = response["table"]  # blank rows removed

    if not config.secret:
        return failure("CVDB_SECRET directory is not configured")
    secret = []
    path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv")
    if os.path.isfile(path):
        secret = tables.read_tsv(path)

    blind = config.read_blinded_antibodies()

    if len(secret) != len(blind):
        return failure(f"Different number of antibody rows: {len(secret)} != {len(blind)}")

    current_id = "COVIC:0"
    if len(blind) > 0:
        current_id = blind[-1]["ab_id"]

    submission = []
    for row in table:
        current_id = names.increment_id(current_id)

        # secrets: write this to the secret repo
        secret_row = OrderedDict()
        secret_row["ab_id"] = current_id
        secret_row["ab_name"] = row["Antibody name"]
        secret_row["ab_details"] = row["Antibody details"]
        secret_row["ab_comment"] = row["Antibody comment"]
        secret_row["org_name"] = organization
        secret_row["submitter_email"] = email
        secret.append(secret_row)

        # blind: write this to staging/public repos
        blind_row = OrderedDict()
        blind_row["ab_id"] = current_id

        # submission: return this to the submitter
        submission_row = OrderedDict()
        submission_row["ab_id"] = current_id
        submission_row["ab_name"] = row["Antibody name"]

        # for each header, add cells to blind and submission
        for header in headers[1:]:
            column = header["value"]
            value = row[header["label"]]
            if column.endswith("_label"):
                i = config.ids.get(value, "")
                blind_row[column.replace("_label", "_id")] = i
                submission_row[column.replace("_label", "_id")] = i
                submission_row[column] = value
            else:
                blind_row[column] = value
                submission_row[column] = value

        blind.append(blind_row)
        submission.append(submission_row)

    author = Actor(name, email)

    # secret
    try:
        path = os.path.join(config.secret.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(secret, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.secret.index.add([path])
        config.secret.index.commit("Submit antibodies", author=author, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # staging
    try:
        path = os.path.join(config.staging.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(blind, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.staging.index.add([path])
        config.staging.index.commit("Submit antibodies", author=author, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    # public
    if not config.public:
        return failure("CVDB_PUBLIC directory is not configured")
    try:
        path = os.path.join(config.public.working_tree_dir, "antibodies.tsv")
        tables.write_tsv(blind, path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.public.index.add([path])
        config.public.index.commit("Submit antibodies", author=config.covic, committer=config.covic)
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    grid = grids.table_to_grid(config.prefixes, config.fields, submission)
    print("Submitted antibodies")
    return success({"table": submission, "grid": grid})
Beispiel #12
0
def submit(name, email, dataset_id, table):
    """Given a dataset ID and a new table of assays,
    validate it, save it to staging, and commit."""
    response = validate(dataset_id, table)
    if failed(response):
        return response
    table = response["table"]  # remove blank rows

    ab_ids = {}
    for ab in config.read_blinded_antibodies():
        ab_id = ab["ab_id"]
        ab_label = ab_id.replace(":", "-")
        ab_ids[ab_label] = ab_id
    for row in config.ab_controls.values():
        ab_ids[row["label"]] = row["id"]

    assay_headers = get_assay_headers(dataset_id)
    assays = []
    for row in table:
        assay = OrderedDict()
        for header in assay_headers:
            value = header["value"]
            label = header["label"]
            if value == "ab_label":
                row[label] = row[label].strip()
                assay["ab_id"] = ab_ids[row[label]]
            else:
                assay[value] = row[label]
        assays.append(assay)

    author = Actor(name, email)

    # staging
    if not config.staging:
        return failure("CVDB_STAGING directory is not configured")
    dataset_path = os.path.join(config.staging.working_tree_dir, "datasets",
                                str(dataset_id))
    paths = []
    try:
        set_staging_value(dataset_id, "Dataset status", "submitted")
        path = os.path.join(dataset_path, "dataset.yml")
        paths.append(path)
    except Exception as e:
        return failure("Failed to update dataset status", {"exception": e})
    try:
        path = os.path.join(dataset_path, "assays.tsv")
        tables.write_tsv(assays, path)
        paths.append(path)
    except Exception as e:
        return failure(f"Failed to write '{path}'", {"exception": e})
    try:
        config.staging.index.add(paths)
        config.staging.index.commit(
            f"Submit assays to dataset {dataset_id}",
            author=author,
            committer=config.covic,
        )
    except Exception as e:
        return failure(f"Failed to commit '{path}'", {"exception": e})

    grid = grids.table_to_grid(config.prefixes, config.fields, table)
    print(f"Submitted assays to dataset {dataset_id}")
    return success({"table": table, "grid": grid, "dataset_id": dataset_id})